コード例 #1
0
ファイル: weijen_register.py プロジェクト: ChengHsi/rucyio
def main(argv):
    try:
        did_client=DIDClient()
        rule_client=RuleClient()
        openfile_name = sys.argv[1]
        scope_name = 'twgrid-user-wchang'
        mysql_engine = create_engine("mysql://*****:*****@rucio-db01.grid.sinica.edu.tw/rucio")
        with open(openfile_name) as file:
            for line in file:
                connection = mysql_engine.connect()
                data=line.strip('\n')
                dataset=data.split(" ")[0]
                file_name=data.split(" ")[1]
                file_size=int(data.split(" ")[2])
                account=data.split(" ")[3]
                pre_md5='twgrid-user-wchang:'+file_name
                md5_sum = hashlib.md5(pre_md5).hexdigest()
                files = []
                files.append({'scope': scope_name, 'name': file_name, 'md5':md5_sum, 'bytes':file_size, 'adler32':'0cc737eb'})
                contact_db = connection.execute("select * from dids where scope='%s' and name='%s';"%(scope_name, file_name)) 
            
                num_rows = contact_db.rowcount
                if num_rows == 0:
                    print "Register File : %s "%file_name 
                    did_client.add_files_to_dataset(scope=scope_name, name=dataset, files=files, rse='TW-DPM01_TWGRIDSCRATCHDISK')
                    #rule_client.add_replication_rule(dids=[{'scope': scope_name, 'name': file_name}], account=account, copies=1, \
                    #                                 rse_expression='TW-DPM01_TWGRIDSCRATCHDISK', grouping='DATASET')
                else:
                    print "Attach File : %s To %s "%(file_name, dataset)
                    did_client.attach_dids(scope=scope_name, name=dataset, dids=files)

                connection.close()                
 
    except SubscriptionDuplicate, e:
        print e
コード例 #2
0
class TestDIDClients:

    def setup(self):
        self.account_client = AccountClient()
        self.scope_client = ScopeClient()
        self.meta_client = MetaClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.rse_client = RSEClient()

    def test_list_dids(self):
        """ DATA IDENTIFIERS (CLIENT): List dids by pattern."""
        tmp_scope = scope_name_generator()
        tmp_files = []
        tmp_files.append('file_a_1%s' % generate_uuid())
        tmp_files.append('file_a_2%s' % generate_uuid())
        tmp_files.append('file_b_1%s' % generate_uuid())
        tmp_rse = 'MOCK'

        self.scope_client.add_scope('jdoe', tmp_scope)
        for tmp_file in tmp_files:
            self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 1)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 3)
        results = []

        filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)}
        for result in self.did_client.list_dids(tmp_scope, filters):
            results.append(result)
        assert_equal(len(results), 0)
        with assert_raises(UnsupportedOperation):
            self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype')

    def test_list_recursive(self):
        """ DATA IDENTIFIERS (CLIENT): List did recursive """
        # Create nested containers and datast
        tmp_scope_1 = 'list-did-recursive'
        tmp_scope_2 = 'list-did-recursive-2'
        self.scope_client.add_scope('root', tmp_scope_1)
        self.scope_client.add_scope('root', tmp_scope_2)

        tmp_container_1 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1)

        tmp_container_2 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2)

        tmp_dataset_1 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1)

        tmp_dataset_2 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2)

        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}])

        # List DIDs not recursive - only the first container is expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})]
        assert_equal(dids, [tmp_container_1])

        # List DIDs recursive - first container and all attached collections are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 in dids)
        assert_true(tmp_dataset_2 in dids)
        assert_equal(len(dids), 4)

        # List DIDs recursive - only containers are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 not in dids)
        assert_true(tmp_dataset_2 not in dids)
        assert_equal(len(dids), 2)

    def test_list_by_length(self):
        """ DATA IDENTIFIERS (CLIENT): List did with length """
        tmp_scope = 'mock'

        tmp_dsn = 'dsn_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_not_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

    def test_list_by_metadata(self):
        """ DATA IDENTIFIERS (CLIENT): List did with metadata"""
        dsns = []
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn1)

        dataset_meta = {'project': 'data12_8TeV',
                        'run_number': 400000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m920',
                        }
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta)
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn2)
        dataset_meta['run_number'] = 400001
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta)

        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn3)
        dataset_meta['stream_name'] = 'physics_Egamma'
        dataset_meta['datatype'] = 'NTUP_SMWZ'
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn1)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn2)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)

        with assert_raises(KeyNotFound):
            self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'})

    def test_add_did(self):
        """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        tmp_dsn = 'dsn_%s' % generate_uuid()
        root = InternalAccount('root')
        set_local_account_limit(root, get_rse_id('MOCK'), -1)
        set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1)

        # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename
        dataset_meta = {'project': 'data13_hip',
                        'run_number': 300000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m927',
                        }
        rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}]

        with assert_raises(ScopeNotFound):
            self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules)

        files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ]
        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files)

        files = []
        for i in range(5):
            lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 10}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})

        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}]

        with assert_raises(InvalidPath):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse)

        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)

        files = []
        for i in range(5):
            lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 100}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})
        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}]

        with assert_raises(InvalidPath):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)
        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse)

        self.did_client.close(scope=tmp_scope, name=tmp_dsn)

        tmp_dsn_output = 'dsn_%s' % generate_uuid()
        self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2)
        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)]
        assert_equal(len(files), 2)

    def test_attach_dids_to_dids(self):
        """ DATA IDENTIFIERS (CLIENT): Attach dids to dids"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        nb_datasets = 5
        nb_files = 5
        attachments, dsns = list(), list()
        guid_to_query = None
        dsn = {}
        for i in range(nb_datasets):
            attachment = {}
            attachment['scope'] = tmp_scope
            attachment['name'] = 'dsn.%s' % str(generate_uuid())
            attachment['rse'] = tmp_rse
            files = []
            for i in range(nb_files):
                files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                              'bytes': 724963570, 'adler32': '0cc737eb',
                              'meta': {'guid': str(generate_uuid()), 'events': 100}})
            attachment['dids'] = files
            guid_to_query = files[0]['meta']['guid']
            dsn = {'scope': tmp_scope, 'name': attachment['name']}
            dsns.append(dsn)
            attachments.append(attachment)

        self.did_client.add_datasets(dsns=dsns)
        self.did_client.attach_dids_to_dids(attachments=attachments)
        dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)]

        assert_equal([dsn], dsns_l)

        cnt_name = 'cnt_%s' % generate_uuid()
        self.did_client.add_container(scope='mock', name=cnt_name)
        with assert_raises(UnsupportedOperation):
            self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}])

    def test_add_files_to_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Add files to Datasets"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        dsn1 = 'dsn.%s' % str(generate_uuid())
        dsn2 = 'dsn.%s' % str(generate_uuid())
        meta = {'transient': True}
        files1, files2, nb_files = [], [], 5
        for i in range(nb_files):
            files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})
            files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})

        self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1,
                                    rse=tmp_rse, meta=meta)
        self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2,
                                    rse=tmp_rse, meta=meta)

        attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse},
                       {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}]

        self.did_client.add_files_to_datasets(attachments)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 10)

        with assert_raises(FileAlreadyExists):
            self.did_client.add_files_to_datasets(attachments)

        for attachment in attachments:
            for i in range(nb_files):
                attachment['dids'].append({'scope': tmp_scope,
                                           'name': 'lfn.%s' % str(generate_uuid()),
                                           'bytes': 724963570,
                                           'adler32': '0cc737eb',
                                           'meta': {'guid': str(generate_uuid()),
                                                    'events': 100}})

        self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 15)

        # Corrupt meta-data
        files = []
        for attachment in attachments:
            for file in attachment['dids']:
                file['bytes'] = 1000
                break

        with assert_raises(FileConsistencyMismatch):
            self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

    def test_add_dataset(self):
        """ DATA IDENTIFIERS (CLIENT): Add dataset """
        tmp_scope = 'mock'
        tmp_dsn = 'dsn_%s' % generate_uuid()

        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'})

        did = self.did_client.get_did(tmp_scope, tmp_dsn)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_dsn)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_add_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """
        tmp_scope = 'mock'
        dsns = list()
        for i in range(500):
            tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}}
            dsns.append(tmp_dsn)
        self.did_client.add_datasets(dsns)

    def test_exists(self):
        """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """
        tmp_scope = 'mock'
        tmp_file = 'file_%s' % generate_uuid()
        tmp_rse = 'MOCK'

        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        did = self.did_client.get_did(tmp_scope, tmp_file)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_file)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_did_hierarchy(self):
        """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(4)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(4):
            self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None)
        for i in range(4):
            self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None)

        for i in range(4):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}])
        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            pass
            # TODO: fix, fix, fix
            # if r['name'] == cnt[1]:
            #    assert_equal(r['type'], 'container')
            #    assert_equal(r['level'], 0)
            # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]):
            #    assert_equal(r['level'], 0)
            # else:
            #     assert_equal(r['level'], 1)

    def test_detach_did(self):
        """ DATA IDENTIFIERS (CLIENT): Detach dids from a did"""

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(5)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(2)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(5):
            self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None)
        for i in range(2):
            self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None)

        for i in range(5):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}])

        with assert_raises(UnsupportedOperation):
            self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}])

        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}])
        self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}])
        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            if r['name'] == dst[1]:
                assert_equal(r['level'], 0)
            if r['type'] == 'file':
                if (r['name'] in file[6:9]):
                    assert_equal(r['level'], 0)
                else:
                    assert_not_equal(r['level'], 0)

        with assert_raises(UnsupportedOperation):
            self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}])

        self.did_client.close(scope, dst[4])
        metadata = self.did_client.get_metadata(scope, dst[4])
        i_bytes, i_length = metadata['bytes'], metadata['length']
        metadata = self.did_client.get_metadata(scope, file[8])
        file1_bytes = metadata['bytes']
        metadata = self.did_client.get_metadata(scope, file[9])
        file2_bytes = metadata['bytes']
        self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}])
        metadata = self.did_client.get_metadata(scope, dst[4])
        f_bytes, f_length = metadata['bytes'], metadata['length']
        assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes)
        assert_equal(i_length, f_length + 1 + 1)

    def test_scope_list(self):
        """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """

        # create some dummy data
        self.tmp_accounts = ['jdoe' for i in range(3)]
        self.tmp_scopes = [scope_name_generator() for i in range(3)]
        self.tmp_rses = [rse_name_generator() for i in range(3)]
        self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)]
        self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)]
        self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)]

        # add dummy data to the catalogue
        for i in range(3):
            self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i])
            self.rse_client.add_rse(self.tmp_rses[i])
            self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb')

        # put files in datasets
        for i in range(3):
            for j in range(3):
                files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}]
                self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j])
                self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files)

        # put datasets in containers
        for i in range(3):
            for j in range(3):
                datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}]
                self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j])
                self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets)

        # reverse check if everything is in order
        for i in range(3):
            result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True)

            r_topdids = []
            r_otherscopedids = []
            r_scope = []
            for r in result:
                if r['level'] == 0:
                    r_topdids.append(r['scope'] + ':' + r['name'])
                    r_scope.append(r['scope'])
                if r['scope'] != self.tmp_scopes[i]:
                    r_otherscopedids.append(r['scope'] + ':' + r['name'])
                    assert_in(r['level'], [1, 2])

            for j in range(3):
                assert_equal(self.tmp_scopes[i], r_scope[j])
                if j != i:
                    assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids)
            assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids)

    def test_get_did(self):
        """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        dsn = generate_uuid()

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')

        did = self.did_client.get_did(scope, file)

        assert_equal(did['scope'], scope)
        assert_equal(did['name'], file)

        self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000)
        did2 = self.did_client.get_did(scope, dsn)
        assert_equal(type(did2['expired_at']), datetime)

    def test_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        keys = ['project', 'run_number']
        values = ['data13_hip', 12345678]

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')
        for i in range(2):
            self.did_client.set_metadata(scope, file, keys[i], values[i])

        meta = self.did_client.get_metadata(scope, file)

        for i in range(2):
            assert_equal(meta[keys[i]], values[i])

    def test_list_content(self):
        """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier"""
        rse = 'MOCK'
        scope = 'mock'
        nbfiles = 5
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]
        files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]

        self.did_client.add_dataset(scope, dataset1)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope, dataset1)

        self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse)

        self.did_client.add_container(scope, container)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_datasets_to_container(scope, container, datasets)

        contents = self.did_client.list_content(scope, container)

        datasets_s = [d['name'] for d in contents]
        assert_in(dataset1, datasets_s)
        assert_in(dataset2, datasets_s)

    def test_list_files(self):
        """ DATA IDENTIFIERS (CLIENT): List files for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        for i in range(10):
            self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb')
            self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb')

        self.did_client.add_dataset(scope, dataset1)
        self.did_client.add_files_to_dataset(scope, dataset1, files1)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_container(scope, container)
        self.did_client.add_datasets_to_container(scope, container, datasets)

        # List file content
        content = self.did_client.list_files(scope, files1[i]['name'])
        assert_true(content is not None)
        for d in content:
            assert_true(d['name'] == files1[i]['name'])

        # List container content
        for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]:
            assert_in(d, files1 + files2)

        # List non-existing data identifier content
        with assert_raises(DataIdentifierNotFound):
            self.did_client.list_files(scope, 'Nimportnawak')

    def test_list_replicas(self):
        """ DATA IDENTIFIERS (CLIENT): List replicas for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dsn1 = generate_uuid()
        dsn2 = generate_uuid()
        cnt = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        self.did_client.add_dataset(scope, dsn1)
        self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse)

        self.did_client.add_dataset(scope, dsn2)
        self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse)

        self.did_client.add_container(scope, cnt)
        self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}])

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}])
        assert_true(replicas is not None)

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}])
        assert_true(replicas is not None)

    @raises(UnsupportedOperation)
    def test_close(self):
        """ DATA IDENTIFIERS (CLIENT): test to close data identifiers"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files)

    @raises
    def test_open(self):
        """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True)

    def test_bulk_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back"""
        key = 'project'
        rse = 'MOCK'
        scope = 'mock'
        files = ['file_%s' % generate_uuid() for _ in range(4)]
        dst = ['dst_%s' % generate_uuid() for _ in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        meta_mapping = {}
        list_dids = []
        for idx in range(4):
            self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb')
            self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx)
            list_dids.append({'scope': scope, 'name': files[idx]})
            meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': dst[idx]})
            meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': cnt[idx]})
            meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx)
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta]
        res_list_dids.sort()
        list_dids.sort()
        assert_equal(list_dids, res_list_dids)
        for meta in list_meta:
            did = '%s:%s' % (meta['scope'], meta['name'])
            met = meta_mapping[did]
            assert_equal((key, meta[key]), met)
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 12)
        list_dids = []
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 0)
コード例 #3
0
    def test_list_dataset_replicas_archive(self):
        """ REPLICA (CLIENT): List dataset replicas with archives. """

        replica_client = ReplicaClient()
        did_client = DIDClient()
        rule_client = RuleClient()

        scope = 'mock'

        rse = 'APERTURE_%s' % rse_name_generator()
        rse_id = add_rse(rse, **self.vo)
        add_protocol(rse_id=rse_id,
                     parameter={
                         'scheme': 'root',
                         'hostname': 'root.aperture.com',
                         'port': 1409,
                         'prefix': '//test/chamber/',
                         'impl': 'rucio.rse.protocols.xrootd.Default',
                         'domains': {
                             'lan': {
                                 'read': 1,
                                 'write': 1,
                                 'delete': 1
                             },
                             'wan': {
                                 'read': 1,
                                 'write': 1,
                                 'delete': 1
                             }
                         }
                     })

        rse2 = 'BLACKMESA_%s' % rse_name_generator()
        rse2_id = add_rse(rse2, **self.vo)
        add_protocol(rse_id=rse2_id,
                     parameter={
                         'scheme': 'root',
                         'hostname': 'root.blackmesa.com',
                         'port': 1409,
                         'prefix': '//underground/facility',
                         'impl': 'rucio.rse.protocols.xrootd.Default',
                         'domains': {
                             'lan': {
                                 'read': 1,
                                 'write': 1,
                                 'delete': 1
                             },
                             'wan': {
                                 'read': 1,
                                 'write': 1,
                                 'delete': 1
                             }
                         }
                     })

        # register archive
        archive = {
            'scope': scope,
            'name': 'another.%s.zip' % generate_uuid(),
            'type': 'FILE',
            'bytes': 2596,
            'adler32': 'deedbeaf'
        }
        replica_client.add_replicas(rse=rse, files=[archive])
        replica_client.add_replicas(rse=rse2, files=[archive])

        archived_files = [{
            'scope':
            scope,
            'name':
            'zippedfile-%i-%s' % (i, str(generate_uuid())),
            'type':
            'FILE',
            'bytes':
            4322,
            'adler32':
            'deaddead'
        } for i in range(2)]
        replica_client.add_replicas(rse=rse2, files=archived_files)
        did_client.add_files_to_archive(scope=scope,
                                        name=archive['name'],
                                        files=archived_files)

        dataset_name = 'find_me.' + str(generate_uuid())
        did_client.add_dataset(scope=scope, name=dataset_name)
        did_client.attach_dids(scope=scope,
                               name=dataset_name,
                               dids=archived_files)
        rule_client.add_replication_rule(dids=[{
            'scope': scope,
            'name': dataset_name
        }],
                                         account='root',
                                         copies=1,
                                         rse_expression=rse,
                                         grouping='DATASET')

        res = [
            r for r in replica_client.list_dataset_replicas(scope=scope,
                                                            name=dataset_name)
        ]
        assert len(res) == 1
        assert res[0]['state'] == 'UNAVAILABLE'

        res = [
            r for r in replica_client.list_dataset_replicas(
                scope=scope, name=dataset_name, deep=True)
        ]

        assert len(res) == 3
        assert res[0]['state'] == 'AVAILABLE'
        assert res[1]['state'] == 'AVAILABLE'
        assert res[2]['state'] == 'AVAILABLE'

        del_rse(rse_id)
コード例 #4
0
class TestAbacusCollectionReplica():

    def setUp(self):
        self.account = 'root'
        self.scope = 'mock'
        self.rule_client = RuleClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.upload_client = UploadClient()
        self.file_sizes = 2
        self.dataset = 'dataset_%s' % generate_uuid()
        self.rse = 'MOCK5'
        self.rse_id = get_rse_id(rse=self.rse)

    def tearDown(self):
        undertaker.run(once=True)
        cleaner.run(once=True)
        reaper.run(once=True, rses=[self.rse], greedy=True)

    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1} for i in range(0, 2)]
        self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{'name': file['did_name'], 'scope': file['did_scope']} for file in self.files])
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['bytes'], 0)
        assert_equal(dataset_replica['length'], 0)
        assert_equal(dataset_replica['available_bytes'], 0)
        assert_equal(dataset_replica['available_length'], 0)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['available_bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files))
        assert_equal(str(dataset_replica['state']), 'AVAILABLE')

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id, files=[{'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'])}])
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files) - 1)
        assert_equal(dataset_replica['available_bytes'], (len(self.files) - 1) * self.file_sizes)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Delete all files -> collection replica should be deleted
        cleaner.run(once=True)
        reaper.run(once=True, rses=[self.rse], greedy=True)
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)]
        assert_equal(len(dataset_replica), 0)
コード例 #5
0
class TestAbacusCollectionReplica(unittest.TestCase):
    def setUp(self):
        self.account = 'root'
        self.scope = 'mock'
        self.rse = 'MOCK5'
        self.file_sizes = 2
        self.dataset = 'dataset_%s' % generate_uuid()

        self.rule_client = RuleClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.upload_client = UploadClient()

        if config_get_bool('common',
                           'multi_vo',
                           raise_exception=False,
                           default=False):
            self.vo = {
                'vo':
                config_get('client',
                           'vo',
                           raise_exception=False,
                           default='tst')
            }
        else:
            self.vo = {}

        self.rse_id = get_rse_id(rse=self.rse, **self.vo)

    def tearDown(self):
        undertaker.run(once=True)
        cleaner.run(once=True)
        if self.vo:
            reaper.run(once=True,
                       include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse),
                       greedy=True)
        else:
            reaper.run(once=True, include_rses=self.rse, greedy=True)

    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{
            'did_scope': self.scope,
            'did_name': 'file_' + generate_uuid(),
            'path': file_generator(size=self.file_sizes),
            'rse': self.rse,
            'lifetime': -1
        } for i in range(0, 2)]
        self.did_client.add_did(self.scope,
                                self.dataset,
                                DIDType.DATASET,
                                lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope,
                                    name=self.dataset,
                                    dids=[{
                                        'name': file['did_name'],
                                        'scope': file['did_scope']
                                    } for file in self.files])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == 0
        assert dataset_replica['length'] == 0
        assert dataset_replica['available_bytes'] == 0
        assert dataset_replica['available_length'] == 0
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['available_bytes'] == len(
            self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files)
        assert str(dataset_replica['state']) == 'AVAILABLE'

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id,
                        files=[{
                            'name':
                            self.files[0]['did_name'],
                            'scope':
                            InternalScope(self.files[0]['did_scope'],
                                          **self.vo)
                        }])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files) - 1
        assert dataset_replica['available_bytes'] == (len(self.files) -
                                                      1) * self.file_sizes
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Delete all files -> collection replica should be deleted
        cleaner.run(once=True)
        if self.vo:
            reaper.run(once=True,
                       include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse),
                       greedy=True)
        else:
            reaper.run(once=True, include_rses=self.rse, greedy=True)
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ]
        assert len(dataset_replica) == 0
コード例 #6
0
class RunSync(object):
    """
    Synchronize the replica of a given run at WIPAC-ORIG 
    the corresponding Rucio site.
    """
    def __init__(self,
                 run,
                 originrse=DEFAULT_ORIGIN_RSE,
                 destrse=None,
                 scope=DEFAULT_SCOPE,
                 check=True,
                 lifetime=None,
                 dry_run=False,
                 container=None):
        """
           :param dataset: Name of the PhEDEx dataset to synchronize with Rucio.
           :param pnn: PhEDEx node name to filter on for replica information.
        """
        self.run = run
        self.originrse = originrse
        self.destrse = destrse
        self.scope = scope
        self.check = check
        self.lifetime = lifetime
        self.dry_run = dry_run
        self.container = container

        self.rucio_datasets = {}
        self.run_files = {}
        self.existent_replica_files = {}
        self.url = ''
        self.gfal = Gfal2Context()

        self.run_Number = None

        self.get_run_Number()
        self.files_storage = {}
        self.get_global_url()

        self.didc = DIDClient()
        self.repc = ReplicaClient()
        self.rulesClient = RuleClient()

        # Right now obtaining the Metadata from the storage at WIPAC
        # Hopefully in the future from JADE                                                                                                                      # TODO
        self.get_run_Files()
        self.get_rucio_metadata()
        self.update_run_Files()
        self.get_files_metadata()

    def update_run_Files(self):
        """
        Updating the run files wiht only the files that have not been registered
        """
        for f in self.existent_replica_files:
            file_name = f.split('/')[-1:][0]
            if file_name in self.run_files:
                print("File: %s already registered. Skipping it" % file_name)
                self.run_files.pop(file_name)

    def get_files_metadata(self):
        for f in self.run_files:
            if self.run + '/' + f not in self.existent_replica_files:
                self.obtain_metadata(f)
        print("Metadat initialization done")

    def obtain_metadata(self, filename):
        """
        Get the size and checksum for every file in the run from the gftp server
        """
        url = self.get_file_url(filename)
        print("checking metadata for url %s" % url)
        try:
            size = self.gfal.stat(str(url)).st_size
            adler32 = self.gfal.checksum(str(url), 'adler32')
            print(
                "got size and adler 32checksum of file: pfn=%s size=%s checksum=%s"
                % (url, size, adler32))
            self.run_files[filename] = {
                'size': size,
                'adler32': adler32,
                'name': self.run + '/' + filename
            }
        except GError:
            print("no file found at %s" % url)
            return False

    def get_file_url(self, filename):
        return self.url + '/' + self.run + '/' + filename

    def get_global_url(self):
        """
        Return the base path of the rucio url
        """
        print("Getting parameters for rse %s" % self.originrse)
        rse = rsemgr.get_rse_info(self.originrse)
        proto = rse['protocols'][0]

        schema = proto['scheme']
        prefix = proto['prefix'] + self.scope.replace('.', '/')
        if schema == 'srm':
            prefix = proto['extended_attributes']['web_service_path'] + prefix
        url = schema + '://' + proto['hostname']
        if proto['port'] != 0:
            url = url + ':' + str(proto['port'])
        self.url = url + prefix
        print("Determined base url %s" % self.url)

    def get_run_Number(self):
        """
        Obtain the run number out of whole run IceCube/2016/filtered/level2pass2/0101/Run00127347
        """
        print("Obtaining run number out of run(dataset): %s" % self.run)
        self.run_Number = self.run.split("/")[-1]
        print("Run number (dataset): %s" % self.run_Number)

    def get_run_Files(self):
        """
        Gets the list of files for a given run and their checksums from the storage
        """
        self.run_url = self.url + '/' + self.run
        print("Listin files from url : %s" % self.run_url)
        run_files = []
        try:
            run_files = self.gfal.listdir(str(self.run_url))
        except GError:
            print("No files found at %s" % str(self.run_url))
        print("Files found in storage:")
        count = 0
        for f in run_files:
            if len(f) > 3:
                if count < 5000:
                    self.run_files[f] = {}
                    count = count + 1
                else:
                    break

    def get_rucio_metadata(self):
        """                                                                                                                                         
        Gets the list of datasets at the Rucio RSE, the files, and the metadata.                                                                           
        """
        print(
            "Initializing Rucio... getting the list of blocks and files at %s"
            % self.originrse)
        registered_datasets = self.repc.list_datasets_per_rse(self.originrse)
        for dataset in registered_datasets:
            self.rucio_datasets[dataset] = {}

        replica_info = self.repc.list_replicas([{
            "scope": self.scope,
            "name": '/' + self.run_Number
        }],
                                               rse_expression="rse=%s" %
                                               self.originrse)
        replica_files = set()
        for file_info in replica_info:
            name = file_info['name']
            if self.originrse in file_info['rses']:
                replica_files.add(name)

        self.existent_replica_files = replica_files
        print("Rucio initialization done.")

    def register(self):
        """
        Create the container, the datasets and attach them to the container.
        """
        print("Registering...")
        self.register_dataset(self.run_Number)
        self.register_replicas(self.run_files)
        self.register_container(self.container)
        self.attach_dataset_to_container(self.run_Number, self.container)
        self.add_replica_rule(dataset=self.run_Number, destRSE=self.destrse)

    def register_container(self, container):
        """
        Registering the container
        """
        print("Registering the container %s with scope: %s" %
              (container, self.scope))
        if container is None:
            print('No container added, not registering any container')
            return
        if self.dry_run:
            print('Dry run only, not registering the container')
            return
        try:
            self.didc.add_container(scope=self.scope,
                                    name=container,
                                    lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print("Container %s already exists" % container)
        except InvalidObject:
            print("Problem with container name: %s" % container)

    def attach_dataset_to_container(self, dataset, container):
        """
        Attaching the dataset to a container
        """
        print("Attaching dataset %s, to container: %s" % (dataset, container))
        if container is None:
            print('No container added, not registering dataset in container')
            return
        if self.dry_run:
            print('Dry run only, not attaching dataset container')
            return
        try:
            self.didc.attach_dids(scope=self.scope,
                                  name=container,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': '/' + dataset
                                  }])
        except RucioException:
            print("dataset already attached to container")
        return

    def register_dataset(self, run):
        """
        Registering a dataset in the rucio database
        """
        print("registering dataset %s" % run)
        if self.dry_run:
            print(' Dry run only. Not creating dataset.')
            return
        try:
            self.didc.add_dataset(scope=self.scope,
                                  name=run,
                                  lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Dataset %s already exists" % run)

    def register_replicas(self, replicas):
        """
        Register file replica.
        """
        if not replicas:
            return
        print("registering files in Rucio: %s" %
              ", ".join([replicas[filemd]['name'] for filemd in replicas]))
        if self.dry_run:
            print(' Dry run only. Not registering files.')
            return
        try:
            self.repc.add_replicas(rse=self.originrse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       replicas[filemd]['name'],
                                       'adler32':
                                       replicas[filemd]['adler32'],
                                       'bytes':
                                       replicas[filemd]['size'],
                                   } for filemd in replicas])
            print("Adding files to dataset: %s" % self.run_Number)
        except InvalidObject:
            print("Problem with file name does not match pattern")

        for filemd in replicas:
            try:
                self.didc.attach_dids(scope=self.scope,
                                      name=self.run_Number,
                                      dids=[{
                                          'scope': self.scope,
                                          'name': replicas[filemd]['name']
                                      }])
            except FileAlreadyExists:
                print("File already attached")

    def add_replica_rule(self, destRSE, dataset):
        """
        Create a replication rule for one dataset "Run" at an RSE
        """
        print("Creating replica rule for dataset %s at rse: %s" %
              (dataset, destRSE))
        if self.dry_run:
            print(' Dry run only. Not creating rules')
            return
        if destRSE:
            try:
                self.rulesClient.add_replication_rule([{
                    "scope": self.scope,
                    "name": "/" + dataset
                }],
                                                      copies=1,
                                                      rse_expression=destRSE)
            except DuplicateRule:
                print('Rule already exists')
コード例 #7
0
ファイル: cmsexample.py プロジェクト: astroclark/ligo-rucio
class DatasetInjector(object):
    """
    General Class for injecting a cms dataset in rucio
    """
    def __init__(self,
                 dataset,
                 site,
                 rse=None,
                 scope=DEFAULT_SCOPE,
                 uuid=None,
                 check=True,
                 lifetime=None,
                 dry_run=False):
        self.dataset = dataset
        self.site = site
        if rse is None:
            rse = site
        self.rse = rse
        self.scope = scope
        self.uuid = uuid
        self.check = check
        self.lifetime = lifetime
        self.dry_run = dry_run

        self.blocks = []
        self.url = ''

        self.getmetadata()
        self.get_global_url()
        self.didc = DIDClient()
        self.repc = ReplicaClient()

        self.gfal = Gfal2Context()

    def get_file_url(self, lfn):
        """
        Return the rucio url of a file.
        """
        return self.url + '/' + lfn

    def get_global_url(self):
        """
        Return the base path of the rucio url
        """
        print("Getting parameters for rse %s" % self.rse)
        rse = rsemgr.get_rse_info(self.rse)
        proto = rse['protocols'][0]

        schema = proto['scheme']
        prefix = proto['prefix'] + '/' + self.scope.replace('.', '/')
        if schema == 'srm':
            prefix = proto['extended_attributes']['web_service_path'] + prefix
        url = schema + '://' + proto['hostname']
        if proto['port'] != 0:
            url = url + ':' + str(proto['port'])
        self.url = url + prefix
        print("Determined base url %s" % self.url)

    def getmetadata(self):
        """
        Gets the list of blocks at a site, their files and their metadata
        """
        print("Initializing... getting the list of blocks and files")
        blocks = das_go_client("block dataset=%s site=%s system=phedex" %
                               (self.dataset, self.site))
        for item in blocks:
            uuid = item['block'][0]['name'].split('#')[1]
            if (self.uuid is None) or (uuid == self.uuid):
                block = {'name': item['block'][0]['name'], 'files': []}
                files = das_go_client("file block=%s site=%s system=phedex" %
                                      (block['name'], self.site))
                for item2 in files:
                    cksum = re.match(r"adler32:([^,]+)",
                                     item2['file'][0]['checksum'])
                    cksum = cksum.group(0).split(':')[1]
                    cksum = "{0:0{1}x}".format(int(cksum, 16), 8)
                    block['files'].append({
                        'name': item2['file'][0]['name'],
                        'checksum': cksum,
                        'size': item2['file'][0]['size']
                    })
                self.blocks.append(block)
        print("Initalization done.")

    def register(self):
        """
        Create the container, the  datasets and attach them to the container.
        """
        print("Registering...")
        self.register_container()
        for block in self.blocks:
            self.register_dataset(block['name'])
            for filemd in block['files']:
                self.register_replica(filemd)
                self.attach_file(filemd['name'], block['name'])
        print("All datasets, blocks and files registered")

    def register_container(self):
        """
        Create the container.
        """

        print("registering container %s" % self.dataset)
        if self.dry_run:
            print(' Dry run only. Not creating container.')
            return

        try:
            self.didc.add_container(scope=self.scope,
                                    name=self.dataset,
                                    lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Container %s already exists" % self.dataset)

    def register_dataset(self, block):
        """
        Create the dataset and attach them to teh container
        """
        print("registering dataset %s" % block)

        if self.dry_run:
            print(' Dry run only. Not creating dataset.')
            return

        try:
            self.didc.add_dataset(scope=self.scope,
                                  name=block,
                                  lifetime=self.lifetime)
        except DataIdentifierAlreadyExists:
            print(" Dataset %s already exists" % block)

        try:
            print("attaching dataset %s to container %s" %
                  (block, self.dataset))
            self.didc.attach_dids(scope=self.scope,
                                  name=self.dataset,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': block
                                  }])
        except RucioException:
            print(" Dataset already attached")

    def attach_file(self, lfn, block):
        """
        Attach the file to the container
        """

        if self.dry_run:
            print(' Dry run only. Not attaching files.')
            return

        try:
            print("attaching file %s" % lfn)
            self.didc.attach_dids(scope=self.scope,
                                  name=block,
                                  dids=[{
                                      'scope': self.scope,
                                      'name': lfn
                                  }])
        except FileAlreadyExists:
            print("File already attached")

    def register_replica(self, filemd):
        """
        Register file replica.
        """
        print("registering file %s" % filemd['name'])

        if self.dry_run:
            print(' Dry run only. Not registering files.')
            return

        if self.check:
            self.check_storage(filemd)
        if not self.check_replica(filemd['name']):
            self.repc.add_replicas(rse=self.rse,
                                   files=[{
                                       'scope':
                                       self.scope,
                                       'name':
                                       filemd['name'],
                                       'adler32':
                                       filemd['checksum'],
                                       'bytes':
                                       filemd['size'],
                                       'pfn':
                                       self.get_file_url(filemd['name'])
                                   }])

    def check_storage(self, filemd):
        """
        Check size and checksum of a file on storage
        """
        url = self.get_file_url(filemd['name'])
        print("checking url %s" % url)
        try:
            size = self.gfal.stat(str(url)).st_size
            checksum = self.gfal.checksum(str(url), 'adler32')
            print("got size and checksum of file: pfn=%s size=%s checksum=%s" %
                  (url, size, checksum))
        except GError:
            print("no file found at %s" % url)
            return False
        if str(size) != str(filemd['size']):
            print("wrong size for file %s. Expected %s got %s" %
                  (filemd['name'], filemd['size'], size))
            return False
        if str(checksum) != str(filemd['checksum']):
            print("wrong checksum for file %s. Expected %s git %s" %
                  (filemd['name'], filemd['checksum'], checksum))
            return False
        print("size and checksum are ok")
        return True

    def check_replica(self, lfn):
        """
        Check if a replica of the given file at the site already exists.
        """
        print("checking if file %s with scope %s has already a replica at %s" %
              (lfn, self.scope, self.rse))
        replicas = list(
            self.repc.list_replicas([{
                'scope': self.scope,
                'name': lfn
            }]))
        if replicas:
            replicas = replicas[0]
            if 'rses' in replicas:
                if self.rse in replicas['rses']:
                    print("file %s with scope %s has already a replica at %s" %
                          (lfn, self.scope, self.rse))
                    return True
        print("no existing replicas")
        return False
コード例 #8
0
            print("Skipping block %s at %s" % (block, firstSite))
            continue
        # Make Rucio dataset to correspond to CMS blocks. Attach this dataset to the container representing CMS dataset
        rucio_block_ds = block.replace('/', '', 1).replace('/', PATTERN)
        rucio_block_ds = block
        block_datasets.append({'scope':'cms', 'name':rucio_block_ds})
        try:
            print(" Adding (block) dataset %s" % rucio_block_ds)

            status = dClient.add_dataset(scope='cms', name=rucio_block_ds, lifetime=DAYS_TO_LIVE*24*3600)
            print('  Status for add_dataset', status)
        except DataIdentifierAlreadyExists:
            print('Dataset already exists')

        try:
            status = dClient.attach_dids(scope='cms', name=RUCIO_CONTAINER,
                                         dids=[{'scope': 'cms', 'name': rucio_block_ds}])
            print('  Status for attach dataset', status)
        except RucioException:
            print("Attach faild, probabably already done.")

        print(' Creating files for block %s' % block)

        phedex_files = json.loads(subprocess.check_output(DAS + [DAS_FILE_PHEDEX % block]))
        dbs_files = json.loads(subprocess.check_output(DAS + [DAS_FILE_DBS % block]).strip())

        replicas = []
        for fileDict in phedex_files:

            phedex_bytes = fileDict['file'][0]['bytes']
            adler32 = None
            for checksum in fileDict['file'][0]['checksum'].split(','):