Ejemplo n.º 1
0
    def get_file(self, name):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            dataset_entry = result[0]['dataset'][0]
            block_entry = dataset_entry['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        if not self.check_allowed_deataset(dataset_entry['name']):
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        # Just need a named object
        dataset = Dataset(dataset_entry['name'])
        block = Block(block_name, dataset)

        lfile = self._create_file(file_entry, block)

        return lfile
Ejemplo n.º 2
0
    def _load_blocks(self, inventory, id_dataset_map, id_block_maps):
        sql = 'SELECT b.`id`, b.`dataset_id`, b.`name`, b.`size`, b.`num_files`, b.`is_open`, UNIX_TIMESTAMP(b.`last_update`) FROM `blocks` AS b'

        if self._mysql.table_exists('datasets_load_tmp'):
            sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = b.`dataset_id`'

        sql += ' ORDER BY b.`dataset_id`'

        _dataset_id = 0
        dataset = None
        for block_id, dataset_id, name, size, num_files, is_open, last_update in self._mysql.xquery(
                sql):
            if dataset_id != _dataset_id:
                _dataset_id = dataset_id

                dataset = id_dataset_map[dataset_id]
                dataset.blocks.clear()
                dataset.size = 0
                dataset.num_files = 0

                id_block_map = id_block_maps[dataset_id] = {}

            block = Block(Block.to_internal_name(name), dataset, size,
                          num_files, (is_open == 1), last_update)

            dataset.blocks.add(block)
            dataset.size += block.size
            dataset.num_files += block.num_files

            id_block_map[block_id] = block
Ejemplo n.º 3
0
    def _create_block(self, block_entry, dataset):
        """
        Create a block object with files from a PhEDEx block entry
        """

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        block = Block(block_name,
                      dataset,
                      size=block_entry['bytes'],
                      num_files=block_entry['files'],
                      is_open=(block_entry['is_open'] == 'y'))

        if 'time_update' in block_entry and block_entry[
                'time_update'] is not None:
            block.last_update = int(block_entry['time_update'])
        else:
            block.last_update = int(block_entry['time_create'])

        return block
Ejemplo n.º 4
0
    def make_block_replicas(dataset_entries, replica_maker):
        """Return a list of block replicas linked to Dataset, Block, Site, and Group"""

        block_replicas = []

        for dataset_entry in dataset_entries:
            dataset = Dataset(dataset_entry['name'])

            for block_entry in dataset_entry['block']:
                name = block_entry['name']
                try:
                    block_name = Block.to_internal_name(name[name.find('#') +
                                                             1:])
                except ValueError:  # invalid name
                    continue

                block = Block(block_name, dataset, block_entry['bytes'])

                block_replicas.extend(replica_maker(block, block_entry))

        return block_replicas
Ejemplo n.º 5
0
    def get_file(self, name, block=None):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            block_entry = result[0]['dataset'][0]['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        if block is None:
            link_block = False
            # Just need a named object
            dataset = Dataset(dataset_entry['name'])
            block = Block(block_name, dataset)
        else:
            link_block = True
            if block.name != block_name:
                raise IntegrityError(
                    'Inconsistent block %s passed to get_file(%s)',
                    block.full_name(), name)

        lfile = self._create_file(file_entry, block)

        if link_block:
            # Caution - by adding this file we edit the block properties too

            existing = block.find_file(lfile.fid())
            if existing is None:
                block.add_file(lfile)
            else:
                block.remove_file(existing)
                block.add_file(lfile)

        return lfile
Ejemplo n.º 6
0
    def get_files(self, dataset_or_block):  #override
        files = set()

        if type(dataset_or_block) is Dataset:
            result = self._phedex.make_request(
                'data', ['dataset=' + dataset_or_block.name, 'level=file'])
            blocks = dict((b.name, b) for b in dataset_or_block.blocks)
        else:
            result = self._phedex.make_request(
                'data',
                ['block=' + dataset_or_block.full_name(), 'level=file'])
            blocks = {dataset_or_block.name: dataset_or_block}

        try:
            block_entries = result[0]['dataset'][0]['block']
        except:
            return files

        for block_entry in block_entries:
            try:
                file_entries = block_entry['file']
            except:
                continue

            bname = block_entry['name']
            block_name = Block.to_internal_name(bname[bname.find('#') + 1:])
            try:
                block = blocks[block_name]
            except:
                # unknown block! maybe should raise?
                continue

            for file_entry in file_entries:
                files.add(self._create_file(file_entry, block))

        return files
Ejemplo n.º 7
0
    def update(self, inventory):
        for dataset in inventory.datasets.itervalues():
            try:
                dataset.attr.pop('locked_blocks')
            except KeyError:
                pass

        for source, content_type, site_pattern, lock_url in self._sources.itervalues(
        ):
            if lock_url is not None:
                # check that the lock files themselves are not locked
                while True:
                    # Hacky but this is temporary any way
                    opener = urllib2.build_opener(
                        webservice.HTTPSCertKeyHandler(Configuration()))
                    opener.addheaders.append(('Accept', 'application/json'))
                    request = urllib2.Request(lock_url)
                    try:
                        opener.open(request)
                    except urllib2.HTTPError as err:
                        if err.code == 404:
                            # file not found -> no lock
                            break
                        else:
                            raise

                    LOG.info(
                        'Lock files are being produced. Waiting 60 seconds.')
                    time.sleep(60)

            LOG.info('Retrieving lock information from %s', source.url_base)

            data = source.make_request()

            if content_type == WebReplicaLock.LIST_OF_DATASETS:
                # simple list of datasets
                for dataset_name in data:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS:
                # data['result'] -> simple list of datasets
                for dataset_name in data['result']:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.SITE_TO_DATASETS:
                # data = {site: {dataset: info}}
                for site_name, objects in data.items():
                    try:
                        site = inventory.sites[site_name]
                    except KeyError:
                        LOG.debug('Unknown site %s in %s', site_name,
                                  source.url_base)
                        continue

                    for object_name, info in objects.items():
                        if not info['lock']:
                            LOG.debug('Object %s is not locked at %s',
                                      object_name, site_name)
                            continue

                        if '#' in object_name:
                            dataset_name, block_real_name = object_name.split(
                                '#')
                        else:
                            dataset_name = object_name
                            block_real_name = None

                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            LOG.debug('Unknown dataset %s in %s', dataset_name,
                                      source.url_base)
                            continue

                        replica = site.find_dataset_replica(dataset)
                        if replica is None:
                            LOG.debug('Replica of %s is not at %s in %s',
                                      dataset_name, site_name, source.url_base)
                            continue

                        if block_real_name is None:
                            blocks = list(dataset.blocks)
                        else:
                            block = dataset.find_block(
                                Block.to_internal_name(block_real_name))
                            if block is None:
                                LOG.debug('Unknown block %s of %s in %s',
                                          block_real_name, dataset_name,
                                          source.url_base)
                                continue

                            blocks = [block]

                        try:
                            locked_blocks = dataset.attr['locked_blocks']
                        except KeyError:
                            locked_blocks = dataset.attr['locked_blocks'] = {}

                        if site in locked_blocks:
                            locked_blocks[site].update(blocks)
                        else:
                            locked_blocks[site] = set(blocks)