Python PhEDEx Examples

Programming Language: Python

Namespace/Package Name: dynamo.utils.interface.phedex

Class/Type: PhEDEx

Examples at hotexamples.com: 19

Python PhEDEx - 19 examples found. These are the top rated real world Python examples of dynamo.utils.interface.phedex.PhEDEx extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PhEDEx(13)

make_request(12)

form_catalog_xml(4)

Example #1

Show file

File: phedexdeletion.py Project: SmartDataProjects/dynamo-cms

    def __init__(self, config=None):
        config = Configuration(config)

        DeletionInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.auto_approval = config.get('auto_approval', True)
        self.allow_tape_deletion = config.get('allow_tape_deletion', True)
        self.tape_auto_approval = config.get('tape_auto_approval', False)

        self.deletion_chunk_size = config.get('chunk_size', 50.) * 1.e+12

Example #2

Show file

File: taperequest.py Project: SmartDataProjects/dynamo-cms

class TapeCopyRequested(object):
    """
    Check for pending tape transfer requests.
    Sets one attr:
      tape_copy_requested
    """

    produces = ['tape_copy_requested']

    def __init__(self, config):
        self._phedex = PhEDEx(config.get('phedex', None))

    def load(self, inventory):
        for site in inventory.sites.itervalues():
            if site.storage_type != Site.TYPE_MSS:
                continue

            requests = self._phedex.make_request(
                'transferrequests', ['node=' + site.name, 'approval=pending'])
            for request in requests:
                for dest in request['destinations']['node']:
                    if dest['name'] != site.name:
                        continue

                    if 'decided_by' in dest:
                        break

                    for dataset_entry in request['data']['dbs']['dataset']:
                        try:
                            dataset = inventory.datasets[dataset_entry['name']]
                        except KeyError:
                            continue

                        dataset.attr['tape_copy_requested'] = True

                    for block_entry in request['data']['dbs']['block']:
                        dataset_name, block_name = Block.from_full_name(
                            block_entry['name'])
                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            continue

                        # just label the entire dataset
                        dataset.attr['tape_copy_requested'] = True

Example #3

Show file

File: phedexreplicainfo.py Project: SmartDataProjects/dynamo-cms

class PhEDExReplicaInfoSource(ReplicaInfoSource):
    """ReplicaInfoSource using PhEDEx."""
    def __init__(self, config=None):
        if config is None:
            config = Configuration()

        ReplicaInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._parallelizer_config = config

    def replica_exists_at_site(self, site, item):  #override
        options = ['node=' + site.name]
        if type(item) == Dataset:
            options += ['dataset=' + item.name, 'show_dataset=y']
        elif type(item) == DatasetReplica:
            options += ['dataset=' + item.dataset.name, 'show_dataset=y']
        elif type(item) == Block:
            options += ['block=' + item.full_name()]
        elif type(item) == BlockReplica:
            options += ['block=' + item.block.full_name()]
        else:
            raise RuntimeError('Invalid input passed: ' + repr(item))

        source = self._phedex.make_request('blockreplicas',
                                           options,
                                           timeout=600)

        if len(source) != 0:
            return True

        options = ['node=' + site.name]
        if type(item) == Dataset:
            # check both dataset-level and block-level subscriptions
            options += ['dataset=' + item.name, 'block=%s#*' % item.name]
        elif type(item) == DatasetReplica:
            options += [
                'dataset=' + item.dataset.name,
                'block=%s#*' % item.dataset.name
            ]
        elif type(item) == Block:
            options += ['block=' + item.full_name()]
        elif type(item) == BlockReplica:
            options += ['block=' + item.block.full_name()]

        # blockreplicas has max ~20 minutes latency
        source = self._phedex.make_request('subscriptions',
                                           options,
                                           timeout=600)

        return len(source) != 0

    def get_replicas(self, site=None, dataset=None, block=None):  #override
        if site is None:
            site_check = self.check_allowed_site
        else:
            site_check = None
            if not self.check_allowed_site(site):
                return []

        if dataset is None and block is None:
            dataset_check = self.check_allowed_dataset
        else:
            dataset_check = None
            if dataset is not None:
                if not self.check_allowed_dataset(dataset):
                    return []
            if block is not None:
                if not self.check_allowed_dataset(block[:block.find('#')]):
                    return []

        options = []
        if site is not None:
            options.append('node=' + site)
        if dataset is not None:
            options.append('dataset=' + dataset)
        if block is not None:
            options.append('block=' + block)

        LOG.info('get_replicas(' + ','.join(options) +
                 ')  Fetching the list of replicas from PhEDEx')

        if len(options) == 0:
            return []

        block_entries = self._phedex.make_request('blockreplicas',
                                                  options,
                                                  timeout=7200)

        parallelizer = Map()
        parallelizer.timeout = 7200

        # Automatically starts a thread as we add the output of block_entries
        combine_file = parallelizer.get_starter(self._combine_file_info)

        for block_entry in block_entries:
            for replica_entry in block_entry['replica']:
                if replica_entry['complete'] == 'n':
                    break
            else:
                continue

            # there is at least one incomplete replica
            try:
                dataset_name, block_name = Block.from_full_name(
                    block_entry['name'])
            except ObjectError:  # invalid name
                continue

            if dataset_check and not dataset_check(dataset_name):
                continue

            combine_file.add_input(block_entry)

        combine_file.close()

        # _combine_file_info alters block_entries directly - no need to deal with output
        combine_file.get_outputs()

        block_replicas = PhEDExReplicaInfoSource.make_block_replicas(
            block_entries,
            PhEDExReplicaInfoSource.maker_blockreplicas,
            site_check=site_check,
            dataset_check=dataset_check)

        # Also use subscriptions call which has a lower latency than blockreplicas
        # For example, group change on a block replica at time T may not show up in blockreplicas until up to T + 15 minutes
        # while in subscriptions it is visible within a few seconds
        # But subscriptions call without a dataset or block takes too long
        if dataset is None and block is None:
            return block_replicas

        indexed = collections.defaultdict(dict)
        for replica in block_replicas:
            indexed[(replica.site.name,
                     replica.block.dataset.name)][replica.block.name] = replica

        dataset_entries = self._phedex.make_request('subscriptions',
                                                    options,
                                                    timeout=3600)

        for dataset_entry in dataset_entries:
            dataset_name = dataset_entry['name']

            if not self.check_allowed_dataset(dataset_name):
                continue

            try:
                subscriptions = dataset_entry['subscription']
            except KeyError:
                pass
            else:
                for sub_entry in subscriptions:
                    site_name = sub_entry['node']

                    if not self.check_allowed_site(site_name):
                        continue

                    replicas = indexed[(site_name, dataset_name)]

                    for replica in replicas.itervalues():
                        replica.group = Group(sub_entry['group'])
                        replica.is_custodial = (sub_entry['custodial'] == 'y')

            try:
                block_entries = dataset_entry['block']
            except KeyError:
                pass
            else:
                for block_entry in block_entries:
                    try:
                        _, block_name = Block.from_full_name(
                            block_entry['name'])
                    except ObjectError:
                        continue

                    try:
                        subscriptions = block_entry['subscription']
                    except KeyError:
                        continue

                    for sub_entry in subscriptions:
                        site_name = sub_entry['node']

                        if not self.check_allowed_site(site_name):
                            continue

                        try:
                            replica = indexed[(site_name,
                                               dataset_name)][block_name]
                        except KeyError:
                            continue

                        replica.group = Group(sub_entry['group'])

                        if sub_entry['node_bytes'] == block_entry['bytes']:
                            # complete
                            replica.size = sub_entry['node_bytes']
                            if replica.size is None:
                                replica.size = 0
                            replica.files = None
                        else:
                            # incomplete - since we cannot know what files are there, we'll just have to pretend there is none
                            replica.size = 0
                            replica.files = tuple()

                        replica.is_custodial = (sub_entry['custodial'] == 'y')

                        if sub_entry['time_update'] is not None:
                            replica.last_update = 0
                        else:
                            replica.last_update = int(sub_entry['time_update'])

        return block_replicas

    def get_updated_replicas(self, updated_since, inventory):  #override
        LOG.info(
            'get_updated_replicas(%d)  Fetching the list of replicas from PhEDEx',
            updated_since)

        nodes = []
        for entry in self._phedex.make_request('nodes', timeout=600):
            if not self.check_allowed_site(entry['name']):
                continue

            if entry['name'] not in inventory.sites:
                continue

            nodes.append(entry['name'])

        try:
            tmpconfig = Configuration(
                self._parallelizer_config.get('parallel', None))
        except Exception as e:
            LOG.error(str(e))
            tmpconfig = Configuration()

        parallelizer = Map(tmpconfig)
        parallelizer.timeout = 5400

        def get_node_replicas(node):
            options = ['update_since=%d' % updated_since, 'node=%s' % node]
            results = self._phedex.make_request('blockreplicas', options)

            return node, results

        # Use async to fire threads on demand
        node_results = parallelizer.execute(get_node_replicas,
                                            nodes,
                                            async=True)

        # Automatically starts a thread as we add the output of block_replicas
        combine_file = parallelizer.get_starter(self._combine_file_info)

        all_block_entries = []

        for node, block_entries in node_results:
            site = inventory.sites[node]

            for block_entry in block_entries:
                all_block_entries.append(block_entry)

                replica_entry = block_entry['replica'][0]

                if replica_entry['complete'] == 'y':
                    continue

                # incomplete block replica - should we fetch file info?
                try:
                    dataset_name, block_name = Block.from_full_name(
                        block_entry['name'])
                except ObjectError:
                    pass
                else:
                    try:
                        dataset = inventory.datasets[dataset_name]
                        block = dataset.find_block(block_name)
                        replica = block.find_replica(site)
                        if replica.file_ids is None:
                            num_files = block.num_files
                        else:
                            num_files = len(replica.file_ids)

                        if replica.size == replica_entry[
                                'bytes'] and num_files == replica_entry[
                                    'files']:
                            # no we don't have to
                            continue
                    except:
                        # At any point of the above lookups we may hit a None object or KeyError or what not
                        pass

                LOG.debug(
                    'Replica %s:%s is incomplete. Fetching file information.',
                    replica_entry['node'], block_entry['name'])
                combine_file.add_input(block_entry)

        combine_file.close()

        # _combine_file_info alters block_entries directly - no need to deal with output
        combine_file.get_outputs()

        LOG.info('get_updated_replicas(%d) Got outputs' % updated_since)

        return PhEDExReplicaInfoSource.make_block_replicas(
            all_block_entries,
            PhEDExReplicaInfoSource.maker_blockreplicas,
            dataset_check=self.check_allowed_dataset)

    def get_deleted_replicas(self, deleted_since):  #override
        LOG.info(
            'get_deleted_replicas(%d)  Fetching the list of replicas from PhEDEx',
            deleted_since)

        result = self._phedex.make_request(
            'deletions', ['complete_since=%d' % deleted_since], timeout=7200)
        # result is by dataset
        block_entries = []
        for dataset_entry in result:
            block_entries.extend(dataset_entry['block'])

        return PhEDExReplicaInfoSource.make_block_replicas(
            block_entries, PhEDExReplicaInfoSource.maker_deletions)

    def _combine_file_info(self, block_entry):
        try:
            LOG.debug(
                '_combine_file_info(%s) Fetching file replicas from PhEDEx',
                block_entry['name'])
            file_info = self._phedex.make_request(
                'filereplicas', ['block=%s' % block_entry['name']])[0]['file']
        except (IndexError, KeyError):
            # Somehow PhEDEx didn't have a filereplicas entry for this block at this node
            block_entry['file'] = []
        else:
            block_entry['file'] = file_info

    @staticmethod
    def make_block_replicas(block_entries,
                            replica_maker,
                            site_check=None,
                            dataset_check=None):
        """Return a list of block replicas linked to Dataset, Block, Site, and Group"""

        dataset = None
        block_replicas = []

        for block_entry in block_entries:
            try:
                dataset_name, block_name = Block.from_full_name(
                    block_entry['name'])
            except ObjectError:  # invalid name
                continue

            if dataset is None or dataset.name != dataset_name:
                if dataset_check and not dataset_check(dataset_name):
                    continue

                try:
                    dataset = Dataset(dataset_name)
                except ObjectError:
                    # invalid name
                    dataset = None

            if dataset is None:
                continue

            block = Block(block_name, dataset, block_entry['bytes'])
            if block.size is None:
                block.size = 0

            block_replicas.extend(
                replica_maker(block, block_entry, site_check=site_check))

        return block_replicas

    @staticmethod
    def maker_blockreplicas(block, block_entry, site_check=None):
        """Return a list of block replicas using blockreplicas data or a combination of blockreplicas and filereplicas calls."""

        sites = {}
        invalid_sites = set()
        groups = {}

        block_replicas = {}

        for replica_entry in block_entry['replica']:
            site_name = replica_entry['node']
            try:
                site = sites[site_name]
            except KeyError:
                if site_check:
                    if site_name in invalid_sites:
                        continue
                    if not site_check(site_name):
                        invalid_sites.add(site_name)
                        continue

                site = sites[site_name] = Site(site_name)

            group_name = replica_entry['group']
            try:
                group = groups[group_name]
            except KeyError:
                group = groups[group_name] = Group(group_name)

            try:
                time_update = int(replica_entry['time_update'])
            except TypeError:
                # time_update was None
                time_update = 0

            block_replica = BlockReplica(
                block,
                site,
                group,
                is_custodial=(replica_entry['custodial'] == 'y'),
                last_update=time_update)

            block_replicas[site_name] = block_replica

            if replica_entry['complete'] == 'n':
                # temporarily make this a list
                block_replica.file_ids = []
                block_replica.size = 0
                LOG.info("Incomplete %s" % str(block_replica))

        if 'file' in block_entry:
            for file_entry in block_entry['file']:
                for replica_entry in file_entry['replica']:
                    site_name = replica_entry['node']
                    try:
                        block_replica = block_replicas[site_name]
                    except KeyError:
                        continue

                    if block_replica.file_ids is None:
                        continue

                    # add LFN instead of file id
                    block_replica.file_ids.append(file_entry['name'])
                    file_size = file_entry['bytes']
                    if file_size is not None:
                        block_replica.size += file_size

                    try:
                        time_create = int(replica_entry['time_create'])
                    except TypeError:
                        pass
                    else:
                        if time_create > block_replica.last_update:
                            block_replica.last_update = time_create

        for block_replica in block_replicas.itervalues():
            if block_replica.file_ids is not None:
                block_replica.file_ids = tuple(block_replica.file_ids)

        return block_replicas.values()

    @staticmethod
    def maker_deletions(block, block_entry, site_check=None):
        replicas = []

        for deletion_entry in block_entry['deletion']:
            if site_check and not site_check(deletion_entry['node']):
                continue

            block_replica = BlockReplica(block, Site(deletion_entry['node']),
                                         Group.null_group)

            replicas.append(block_replica)

        return replicas

Example #4

Show file

    def __init__(self, config):
        CopyInterface.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

        self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12

Example #5

Show file

class PhEDExCopyInterface(CopyInterface):
    """Copy using PhEDEx."""
    def __init__(self, config):
        CopyInterface.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

        self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12

    def schedule_copy(self, replica, comments=''):  #override
        request_mapping = {}

        subscription_list = []

        if type(replica) is DatasetReplica:
            blocks_by_group = collections.defaultdict(set)
            for block_replica in replica.block_replicas:
                blocks_by_group[block_replica.group].add(block_replica.block)

            if len(blocks_by_group) > 1:
                # this was called as a dataset-level copy, but in fact we have multiple
                # sets of blocks with different groups -> recall block-level schedule_copies
                return self.schedule_copies(replica.block_replicas, comments)

            group, block_replicas = blocks_by_group.items()[0]

            if block_replicas == replica.dataset.blocks:
                subscription_list.append(replica.dataset)
                level = 'dataset'
            else:
                subscription_list.extend(block_replicas)
                level = 'block'

        else:  #BlockReplica
            group = replica.group
            subscription_list.append(replica.block)
            level = 'block'

        self._run_subscription_request(request_mapping, replica.site, group,
                                       level, subscription_list, comments)

        return request_mapping

    def schedule_copies(self, replicas, comments=''):  #override
        request_mapping = {}

        replicas_by_site = collections.defaultdict(list)
        for replica in replicas:
            replicas_by_site[replica.site].append(replica)

        for site, replica_list in replicas_by_site.iteritems():
            # sort the subscriptions into dataset level / block level and by groups
            subscription_lists = {}
            subscription_lists['dataset'] = collections.defaultdict(
                list)  # {(level, group_name): [replicas]}
            subscription_lists['block'] = collections.defaultdict(
                list)  # {(level, group_name): [replicas]}

            for replica in replica_list:
                if type(replica) is DatasetReplica:
                    blocks_by_group = collections.defaultdict(set)
                    for block_replica in replica.block_replicas:
                        blocks_by_group[block_replica.group].add(
                            block_replica.block)

                    for group, blocks in blocks_by_group.iteritems():
                        if blocks == replica.dataset.blocks:
                            subscription_lists['dataset'][group].append(
                                replica.dataset)
                        else:
                            subscription_lists['block'][group].extend(blocks)
                else:
                    subscription_lists['block'][replica.group].append(
                        replica.block)

            for level in ['dataset', 'block']:
                for group, items in subscription_lists[level].iteritems():
                    self._run_subscription_request(request_mapping, site,
                                                   group, level, items,
                                                   comments)

        return request_mapping

    def _run_subscription_request(self, request_mapping, site, group, level,
                                  subscription_list, comments):
        # Make a subscription request for potentitally multiple datasets or blocks but to one site and one group
        full_catalog = collections.defaultdict(list)

        if level == 'dataset':
            for dataset in subscription_list:
                full_catalog[dataset] = []
        elif level == 'block':
            for block in subscription_list:
                full_catalog[block.dataset].append(block)

        LOG.info('Subscribing %d datasets for %s at %s', len(full_catalog),
                 group.name, site.name)

        # make requests in chunks
        request_catalog = {}
        chunk_size = 0
        items = []
        while len(full_catalog) != 0:
            dataset, blocks = full_catalog.popitem()
            request_catalog[dataset] = blocks

            if level == 'dataset':
                chunk_size += dataset.size
                items.append(dataset)
            elif level == 'block':
                chunk_size += sum(b.size for b in blocks)
                items.extend(blocks)

            if chunk_size < self.subscription_chunk_size and len(
                    full_catalog) != 0:
                continue

            options = {
                'node': site.name,
                'data': self._phedex.form_catalog_xml(request_catalog),
                'level': level,
                'priority': 'normal',
                'move': 'n',
                'static': 'n',
                'custodial': 'n',
                'group': group.name,
                'request_only': 'n',
                'no_mail': 'n',
                'comments': comments
            }

            # result = [{'id': <id>}] (item 'request_created' of PhEDEx response)
            if self.dry_run:
                result = [{'id': '0'}]
            else:
                try:
                    result = self._phedex.make_request('subscribe',
                                                       options,
                                                       method=POST)
                except:
                    result = []

            if len(result) != 0:
                request_id = int(result[0]['id'])  # return value is a string
                LOG.warning('PhEDEx subscription request id: %d', request_id)
                request_mapping[request_id] = (True, site, items)
            else:
                LOG.error('Copy %s failed.', str(options))
                # we should probably do something here

            request_catalog = {}
            chunk_size = 0
            items = []

    def copy_status(self, request_id):  #override
        request = self._phedex.make_request('transferrequests',
                                            'request=%d' % request_id)
        if len(request) == 0:
            return {}

        site_name = request[0]['destinations']['node'][0]['name']

        dataset_names = []
        for ds_entry in request[0]['data']['dbs']['dataset']:
            dataset_names.append(ds_entry['name'])

        block_names = []
        for ds_entry in request[0]['data']['dbs']['block']:
            block_names.append(ds_entry['name'])

        subscriptions = []

        if len(dataset_names) != 0:
            chunks = [
                dataset_names[i:i + 35]
                for i in xrange(0, len(dataset_names), 35)
            ]
            for chunk in chunks:
                subscriptions.extend(
                    self._phedex.make_request(
                        'subscriptions', ['node=%s' % site_name] +
                        ['dataset=%s' % n for n in chunk]))

        if len(block_names) != 0:
            chunks = [
                block_names[i:i + 35] for i in xrange(0, len(block_names), 35)
            ]
            for chunk in chunks:
                subscriptions.extend(
                    self._phedex.make_request('subscriptions',
                                              ['node=%s' % site_name] +
                                              ['block=%s' % n for n in chunk]))

        status = {}
        for dataset in subscriptions:
            try:
                cont = dataset['subscription'][0]
                bytes = dataset['bytes']
                node_bytes = cont['node_bytes']
                time_update = cont['time_update']
            except KeyError:
                # this was a block-level subscription (no 'subscription' field for the dataset)
                bytes = 0
                node_bytes = 0
                time_update = 0
                for block in dataset['block']:
                    cont = block['subscription'][0]
                    bytes += block['bytes']
                    node_bytes += cont['node_bytes']
                    time_update = max(time_update, cont['time_update'])

            status[(site_name, dataset['name'])] = (bytes, node_bytes,
                                                    time_update)

        return status

Example #6

Show file

    def __init__(self, config):
        DatasetInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)
        self._dbs = RESTService(config.dbs)

Example #7

Show file

class PhEDExDatasetInfoSource(DatasetInfoSource):
    """DatasetInfoSource using PhEDEx and DBS."""
    def __init__(self, config):
        DatasetInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)
        self._dbs = RESTService(config.dbs)

    def get_dataset_names(self, include=['*'], exclude=[]):
        dataset_names = []

        exclude_exps = []
        for pattern in exclude:
            exclude_exps.append(re.compile(fnmatch.translate(pattern)))

        def add_datasets(result):
            for entry in result:
                name = entry['dataset']
                for ex_exp in exclude_exps:
                    if ex_exp.match(name):
                        break
                else:
                    # not excluded
                    dataset_names.append(name)

        if len(include) == 1 and include[0] == '/*/*/*':
            # all datasets requested - will do this efficiently
            result = self._dbs.make_request('acquisitioneras')
            sds = [entry['acquisition_era_name'] for entry in result]

            # query DBS in parallel
            args = [('datasets', ['acquisition_era_name=' + sd]) for sd in sds]
            results = Map().execute(self._dbs.make_request, args)
            for result in results:
                add_datasets(result)

        for in_pattern in include:
            result = self._dbs.make_request('datasets',
                                            ['dataset=' + in_pattern])
            add_datasets(result)

        return dataset_names

    def get_updated_datasets(self, updated_since):  #override
        LOG.warning(
            'PhEDExDatasetInfoSource can only return a list of datasets and blocks that are created since the given timestamp.'
        )

        result = self._phedex.make_request('data', [
            'dataset=' + name, 'level=block',
            'create_since=%d' % updated_since
        ])

        if len(result) == 0 or 'dataset' not in result[0]:
            return []

        updated_datasets = []

        for dataset_entry in result[0]['dataset']:
            dataset = self._create_dataset(dataset_entry)
            updated_datasets.append(dataset)

        return updated_datasets

    def get_dataset(self, name, with_files=False):  #override
        ## Get the full dataset-block-file data from PhEDEx

        if with_files:
            level = 'file'
        else:
            level = 'block'

        result = self._phedex.make_request(
            'data', ['dataset=' + name, 'level=' + level])

        try:
            dataset_entry = result[0]['dataset'][0]
        except:
            return None

        ## Create the dataset object
        dataset = self._create_dataset(dataset_entry)

        ## Fill block and file data
        if 'block' in dataset_entry:
            for block_entry in dataset_entry['block']:
                block = self._create_block(block_entry, dataset)
                dataset.blocks.add(block)

                # size and num_files are left 0 in _create_dataset (PhEDEx does not tell)
                dataset.size += block.size
                dataset.num_files += block.num_files

                if with_files and 'file' in block_entry:
                    # See comments in get_block
                    block._files = set()
                    for file_entry in block_entry['file']:
                        block._files.add(self._create_file(file_entry, block))

        return dataset

    def get_block(self, name, dataset=None, with_files=False):  #override
        ## Get the full block-file data from PhEDEx

        if with_files:
            level = 'file'
        else:
            level = 'block'

        result = self._phedex.make_request('data',
                                           ['block=' + name, 'level=' + level])

        try:
            dataset_entry = result[0]['dataset'][0]
            block_entry = dataset_entry['block'][0]
        except:
            return None

        if dataset is None:
            link_dataset = False
            # Just need a named object
            dataset = Dataset(dataset_entry['name'])
        else:
            link_dataset = True
            if dataset.name != dataset_entry['name']:
                raise IntegrityError(
                    'Inconsistent dataset %s passed to get_block(%s)',
                    dataset.name, name)

        block = self._create_block(block_entry, dataset)

        if with_files and 'file' in block_entry:
            # _create_block sets size and num_files; just need to update the files list
            # Directly creating the _files set
            # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block
            # from this function is only used temporarily anyway
            block._files = set()
            for file_entry in block_entry['file']:
                block._files.add(self._create_file(file_entry, block))

        if link_dataset:
            existing = dataset.find_block(block.name)
            if existing is None:
                dataset.blocks.add(block)
                dataset.size += block.size
                dataset.num_files += block.num_files
            else:
                dataset.blocks.remove(existing)
                dataset.size += block.size - existing.size
                dataset.num_files += block.num_files - existing.num_files

        return block

    def get_file(self, name, block=None):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            block_entry = result[0]['dataset'][0]['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        if block is None:
            link_block = False
            # Just need a named object
            dataset = Dataset(dataset_entry['name'])
            block = Block(block_name, dataset)
        else:
            link_block = True
            if block.name != block_name:
                raise IntegrityError(
                    'Inconsistent block %s passed to get_file(%s)',
                    block.full_name(), name)

        lfile = self._create_file(file_entry, block)

        if link_block:
            # Caution - by adding this file we edit the block properties too

            existing = block.find_file(lfile.fid())
            if existing is None:
                block.add_file(lfile)
            else:
                block.remove_file(existing)
                block.add_file(lfile)

        return lfile

    def get_files(self, dataset_or_block):  #override
        files = set()

        if type(dataset_or_block) is Dataset:
            result = self._phedex.make_request(
                'data', ['dataset=' + dataset_or_block.name, 'level=file'])
            blocks = dict((b.name, b) for b in dataset_or_block.blocks)
        else:
            result = self._phedex.make_request(
                'data',
                ['block=' + dataset_or_block.full_name(), 'level=file'])
            blocks = {dataset_or_block.name: dataset_or_block}

        try:
            block_entries = result[0]['dataset'][0]['block']
        except:
            return files

        for block_entry in block_entries:
            try:
                file_entries = block_entry['file']
            except:
                continue

            bname = block_entry['name']
            block_name = Block.to_internal_name(bname[bname.find('#') + 1:])
            try:
                block = blocks[block_name]
            except:
                # unknown block! maybe should raise?
                continue

            for file_entry in file_entries:
                files.add(self._create_file(file_entry, block))

        return files

    def _create_dataset(self, dataset_entry):
        """
        Create a dataset object with blocks and files from a PhEDEx dataset entry
        """

        dataset = Dataset(dataset_entry['name'],
                          is_open=(dataset_entry['is_open'] == 'y'))

        if 'time_update' in dataset_entry and dataset_entry[
                'time_update'] is not None:
            dataset.last_update = int(dataset_entry['time_update'])
        else:
            dataset.last_update = int(dataset_entry['time_create'])

        ## Get other details of the dataset from DBS
        self._fill_dataset_details(dataset)

        return dataset

    def _create_block(self, block_entry, dataset):
        """
        Create a block object with files from a PhEDEx block entry
        """

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        block = Block(block_name,
                      dataset,
                      size=block_entry['bytes'],
                      num_files=block_entry['files'],
                      is_open=(block_entry['is_open'] == 'y'))

        if 'time_update' in block_entry and block_entry[
                'time_update'] is not None:
            block.last_update = int(block_entry['time_update'])
        else:
            block.last_update = int(block_entry['time_create'])

        return block

    def _create_file(self, file_entry, block):
        lfile = File(file_entry['lfn'], block=block, size=file_entry['size'])

        return lfile

    def _fill_dataset_details(self, dataset):
        # 1. status and PD type

        result = self._dbs.make_request('datasets', [
            'dataset=' + dataset.name, 'dataset_access_type=*', 'detail=True'
        ])

        if len(result) != 0:
            dbs_entry = result[0]
            dataset.status = Dataset.status_val(
                dbs_entry['dataset_access_type'])
            dataset.data_type = Dataset.data_type_val(
                dbs_entry['primary_ds_type'])
        else:
            dataset.status = Dataset.STAT_UNKNOWN
            dataset.data_type = Dataset.TYPE_UNKNOWN

        # 2. software version

        result = self._dbs.make_request('releaseversions',
                                        ['dataset=' + dataset.name])
        if len(result) != 0:
            try:
                version = result[0]['release_version'][0]
            except KeyError:
                pass
            else:
                matches = re.match('CMSSW_([0-9]+)_([0-9]+)_([0-9]+)(|_.*)',
                                   version)
                if matches:
                    cycle, major, minor = map(
                        int, [matches.group(i) for i in range(1, 4)])

                    if matches.group(4):
                        suffix = matches.group(4)[1:]
                    else:
                        suffix = ''

                    dataset.software_version = (cycle, major, minor, suffix)

Example #8

Show file

class PhEDExReplicaInfoSource(ReplicaInfoSource):
    """ReplicaInfoSource using PhEDEx."""
    def __init__(self, config):
        ReplicaInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

    def replica_exists_at_site(self, site, item):  #override
        options = ['node=' + site.name]

        if type(item) == Dataset:
            options += ['dataset=' + item.name, 'show_dataset=y']
        elif type(item) == DatasetReplica:
            options += ['dataset=' + item.dataset.name, 'show_dataset=y']
        elif type(item) == Block:
            options += ['block=' + item.full_name()]
        elif type(item) == BlockReplica:
            options += ['block=' + item.block.full_name()]
        else:
            raise RuntimeError('Invalid input passed: ' + repr(item))

        source = self._phedex.make_request('blockreplicas', options)

        return len(source) != 0

    def get_replicas(self, site=None, dataset=None, block=None):  #override
        options = []
        if site is not None:
            options.append('node=' + site)
        if dataset is not None:
            options.append('dataset=' + dataset)
        if block is not None:
            options.append('block=' + block)

        LOG.info('get_replicas(' + ','.join(options) +
                 ')  Fetching the list of replicas from PhEDEx')

        if len(options) == 0:
            return []

        result = self._phedex.make_request('blockreplicas',
                                           ['show_dataset=y'] + options)

        return PhEDExReplicaInfoSource.make_block_replicas(
            result, PhEDExReplicaInfoSource.maker_blockreplicas)

    def get_updated_replicas(self, updated_since):  #override
        LOG.info(
            'get_updated_replicas(%d)  Fetching the list of replicas from PhEDEx',
            updated_since)

        result = self._phedex.make_request(
            'blockreplicas',
            ['show_dataset=y',
             'update_since=%d' % updated_since])

        return PhEDExReplicaInfoSource.make_block_replicas(
            result, PhEDExReplicaInfoSource.maker_blockreplicas)

    def get_deleted_replicas(self, deleted_since):  #override
        LOG.info(
            'get_deleted_replicas(%d)  Fetching the list of replicas from PhEDEx',
            deleted_since)

        result = self._phedex.make_request(
            'deletions', ['complete_since=%d' % deleted_since])

        return PhEDExReplicaInfoSource.make_block_replicas(
            result, PhEDExReplicaInfoSource.maker_deletions)

    @staticmethod
    def make_block_replicas(dataset_entries, replica_maker):
        """Return a list of block replicas linked to Dataset, Block, Site, and Group"""

        block_replicas = []

        for dataset_entry in dataset_entries:
            dataset = Dataset(dataset_entry['name'])

            for block_entry in dataset_entry['block']:
                name = block_entry['name']
                try:
                    block_name = Block.to_internal_name(name[name.find('#') +
                                                             1:])
                except ValueError:  # invalid name
                    continue

                block = Block(block_name, dataset, block_entry['bytes'])

                block_replicas.extend(replica_maker(block, block_entry))

        return block_replicas

    @staticmethod
    def maker_blockreplicas(block, block_entry):
        replicas = []

        for replica_entry in block_entry['replica']:
            block_replica = BlockReplica(
                block,
                Site(replica_entry['node']),
                Group(replica_entry['group']),
                is_complete=(replica_entry['bytes'] == block.size),
                is_custodial=(replica_entry['custodial'] == 'y'),
                size=replica_entry['bytes'],
                last_update=int(replica_entry['time_update']))

            replicas.append(block_replica)

        return replicas

    @staticmethod
    def maker_deletions(block, block_entry):
        replicas = []

        for deletion_entry in block_entry['deletion']:
            block_replica = BlockReplica(block, Site(deletion_entry['node']),
                                         Group.null_group)

            replicas.append(block_replica)

        return replicas

Example #9

Show file

class PhEDExGroupInfoSource(GroupInfoSource):
    """GroupInfoSource using PhEDEx."""
    def __init__(self, config):
        GroupInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

    def get_group(self, name):  #override
        if self.include is not None:
            matched = False
            for pattern in self.include:
                if fnmatch.fnmatch(name, pattern):
                    matched = True
                    break

            if not matched:
                LOG.info('get_group(%s)  %s is not included by configuration',
                         name, name)
                return None

        if self.exclude is not None:
            for pattern in self.exclude:
                if fnmatch.fnmatch(name, pattern):
                    LOG.info('get_group(%s)  %s is excluded by configuration',
                             name, name)
                    return None

        LOG.info('get_group(%s)  Fetching info on group %s', name, name)

        result = self._phedex.make_request('groups', ['group=' + name])
        if len(result) == 0:
            return None

        group = Group(name)

        if name in self.dataset_level_groups:
            group.olevel = Dataset
        else:
            group.olevel = Block

        return group

    def get_group_list(self):  #override
        LOG.info('get_group_list  Fetching the list of groups from PhEDEx')
        LOG.debug('Groups with dataset-level ownership: %s',
                  str(self.dataset_level_groups))

        group_list = []

        for entry in self._phedex.make_request('groups'):
            if self.include is not None:
                matched = False
                for pattern in self.include:
                    if fnmatch.fnmatch(entry['name'], pattern):
                        matched = True
                        break

                if not matched:
                    continue

            if self.exclude is not None:
                matched = False
                for pattern in self.exclude:
                    if fnmatch.fnmatch(entry['name'], pattern):
                        matched = True
                        break

                if matched:
                    continue

            if entry['name'] in self.dataset_level_groups:
                olevel = Dataset
            else:
                olevel = Block

            group_list.append(Group(entry['name'], olevel=olevel))

        return group_list

Example #10

Show file

File: taperequest.py Project: SmartDataProjects/dynamo-cms

 def __init__(self, config):
     self._phedex = PhEDEx(config.get('phedex', None))

Example #11

Show file

File: phedexcopy.py Project: SmartDataProjects/dynamo-cms

class PhEDExCopyInterface(CopyInterface):
    """Copy using PhEDEx."""
    def __init__(self, config=None):
        config = Configuration(config)

        CopyInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12

    def schedule_copies(self,
                        replica_list,
                        operation_id,
                        comments=''):  #override
        sites = set(r.site for r in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        LOG.info(
            'Scheduling copy of %d replicas to %s using PhEDEx (operation %d)',
            len(replica_list), site, operation_id)

        # sort the subscriptions into dataset level / block level and by groups
        subscription_lists = {}
        subscription_lists['dataset'] = collections.defaultdict(
            list)  # {(level, group_name): [replicas]}
        subscription_lists['block'] = collections.defaultdict(
            list)  # {(level, group_name): [replicas]}

        for replica in replica_list:
            if replica.growing:
                subscription_lists['dataset'][replica.group].append(
                    replica.dataset)
            else:
                blocks_by_group = collections.defaultdict(set)
                for block_replica in replica.block_replicas:
                    blocks_by_group[block_replica.group].add(
                        block_replica.block)

                for group, blocks in blocks_by_group.iteritems():
                    subscription_lists['block'][group].extend(blocks)

        # for convenience, mapping dataset -> replica
        result = {}

        for level in ['dataset', 'block']:
            for group, items in subscription_lists[level].iteritems():
                success = self._run_subscription_request(
                    operation_id, site, group, level, items, comments)

                for replica in success:
                    if replica.dataset in result:
                        booked = result[replica.dataset]
                        # need to merge
                        for block_replica in replica.block_replicas:
                            # there shouldn't be any block replica overlap but we will be careful
                            if booked.find_block_replica(
                                    block_replica.block) is None:
                                booked.block_replicas.add(block_replica)
                    else:
                        result[replica.dataset] = replica

        return result.values()

    def _run_subscription_request(self, operation_id, site, group, level,
                                  subscription_list, comments):
        # Make a subscription request for potentitally multiple datasets or blocks but to one site and one group
        full_catalog = collections.defaultdict(list)

        if level == 'dataset':
            for dataset in subscription_list:
                full_catalog[dataset] = []
        elif level == 'block':
            for block in subscription_list:
                full_catalog[block.dataset].append(block)

        history_sql = 'INSERT INTO `phedex_requests` (`id`, `operation_type`, `operation_id`, `approved`) VALUES (%s, \'copy\', %s, %s)'

        success = []

        # make requests in chunks
        request_catalog = {}
        chunk_size = 0
        items = []
        while len(full_catalog) != 0:
            dataset, blocks = full_catalog.popitem()
            request_catalog[dataset] = blocks

            if level == 'dataset':
                chunk_size += dataset.size
                items.append(dataset)
            elif level == 'block':
                chunk_size += sum(b.size for b in blocks)
                items.extend(blocks)

            if chunk_size < self.subscription_chunk_size and len(
                    full_catalog) != 0:
                continue

            options = {
                'node': site.name,
                'data': self._phedex.form_catalog_xml(request_catalog),
                'level': level,
                'priority': 'low',
                'move': 'n',
                'static': 'n',
                'custodial': 'n',
                'group': group.name,
                'request_only': 'n',
                'no_mail': 'n',
                'comments': comments
            }

            try:
                if self._read_only:
                    result = [{'id': 0}]
                else:
                    result = self._phedex.make_request('subscribe',
                                                       options,
                                                       method=POST)
            except:
                LOG.error('Copy %s failed.', str(options))
                # we should probably do something here
            else:
                request_id = int(result[0]['id'])  # return value is a string
                LOG.warning('PhEDEx subscription request id: %d', request_id)
                if not self._read_only:
                    self._history.db.query(history_sql, request_id,
                                           operation_id, True)

                for dataset, blocks in request_catalog.iteritems():
                    if level == 'dataset':
                        replica = DatasetReplica(dataset,
                                                 site,
                                                 growing=True,
                                                 group=group)
                        for block in dataset.blocks:
                            replica.block_replicas.add(
                                BlockReplica(block,
                                             site,
                                             group,
                                             size=0,
                                             last_update=int(time.time())))

                    else:
                        replica = DatasetReplica(dataset, site, growing=False)
                        for block in blocks:
                            replica.block_replicas.add(
                                BlockReplica(block,
                                             site,
                                             group,
                                             size=0,
                                             last_update=int(time.time())))

                    success.append(replica)

            request_catalog = {}
            chunk_size = 0
            items = []

        return success

    def copy_status(self, history_record, inventory):  #override
        request_ids = self._history.db.query(
            'SELECT `id` FROM `phedex_requests` WHERE `operation_type` = \'copy\' AND `operation_id` = %s',
            history_record.operation_id)

        if len(request_ids) == 0:
            return {}

        return self.transfer_request_status(request_ids)

    def transfer_request_status(self, request_ids):
        status = {}

        LOG.debug('Querying PhEDEx transferrequests for requests %s',
                  request_ids)
        requests = self._phedex.make_request('transferrequests',
                                             [('request', i)
                                              for i in request_ids],
                                             method=POST)
        if len(requests) == 0:
            return status

        for request in requests:
            # A single request can have multiple destinations
            site_names = [d['name'] for d in request['destinations']['node']]

            dataset_names = []
            for ds_entry in request['data']['dbs']['dataset']:
                dataset_names.append(ds_entry['name'])

            block_names = []
            for ds_entry in request['data']['dbs']['block']:
                block_names.append(ds_entry['name'])

        if len(dataset_names) != 0:
            # Process dataset-level subscriptions

            subscriptions = []
            chunks = [
                dataset_names[i:i + 35]
                for i in xrange(0, len(dataset_names), 35)
            ]
            for site_name in site_names:
                for chunk in chunks:
                    subscriptions.extend(
                        self._phedex.make_request(
                            'subscriptions', ['node=%s' % site_name] +
                            ['dataset=%s' % n for n in chunk]))

            for dataset in subscriptions:
                dataset_name = dataset['name']
                try:
                    cont = dataset['subscription'][0]
                except KeyError:
                    LOG.error('Subscription of %s should exist but doesn\'t',
                              dataset_name)
                    continue

                site_name = cont['node']
                bytes = dataset['bytes']

                node_bytes = cont['node_bytes']
                if node_bytes is None:
                    node_bytes = 0
                elif node_bytes != bytes:
                    # it's possible that there were block-level deletions
                    blocks = self._phedex.make_request(
                        'blockreplicas',
                        ['node=%s' % site_name,
                         'dataset=%s' % dataset_name])
                    bytes = sum(b['bytes'] for b in blocks)

                status[(site_name, dataset_name)] = (bytes, node_bytes,
                                                     cont['time_update'])

        if len(block_names) != 0:
            # Process block-level subscriptions

            subscriptions = []
            chunks = [
                block_names[i:i + 35] for i in xrange(0, len(block_names), 35)
            ]
            for site_name in site_names:
                for chunk in chunks:
                    subscriptions.extend(
                        self._phedex.make_request(
                            'subscriptions', ['node=%s' % site_name] +
                            ['block=%s' % n for n in chunk]))

            overridden = set()

            for dataset in subscriptions:
                dataset_name = dataset['name']

                try:
                    blocks = dataset['block']
                except KeyError:
                    try:
                        cont = dataset['subscription'][0]
                    except KeyError:
                        LOG.error(
                            'Subscription of %s neither block-level nor dataset-level',
                            dataset_name)
                        continue

                    site_name = cont['node']

                    if (site_name, dataset_name) in overridden:
                        # this is a dataset-level subscription and we've processed this dataset already
                        continue

                    overridden.add((site_name, dataset_name))

                    LOG.debug(
                        'Block-level subscription of %s at %s is overridden',
                        dataset_name, site_name)

                    requested_blocks = [
                        name for name in block_names
                        if name.startswith(dataset_name + '#')
                    ]

                    blocks = self._phedex.make_request(
                        'blockreplicas',
                        ['node=%s' % site_name,
                         'dataset=%s' % dataset_name])
                    for block in blocks:
                        block_name = block['name']
                        if block_name not in requested_blocks:
                            continue

                        replica = block['replica'][0]
                        status[(site_name,
                                block_name)] = (block['bytes'],
                                                replica['bytes'],
                                                replica['time_update'])

                    continue

                for block in blocks:
                    block_name = block['name']
                    try:
                        cont = block['subscription'][0]
                    except KeyError:
                        LOG.error(
                            'Subscription of %s should exist but doesn\'t',
                            block_name)
                        continue

                    node_bytes = cont['node_bytes']
                    if node_bytes is None:
                        node_bytes = 0

                    status[(cont['node'],
                            block_name)] = (block['bytes'], node_bytes,
                                            cont['time_update'])

        # now we pick up whatever did not appear in the subscriptions call
        for site_name in site_names:
            for dataset_name in dataset_names:
                key = (site_name, dataset_name)
                if key not in status:
                    status[key] = None

            for block_name in block_names:
                key = (site_name, block_name)
                if key not in status:
                    status[key] = None

        return status

Example #12

Show file

class PhEDExSiteInfoSource(SiteInfoSource):
    """SiteInfoSource for PhEDEx. Also use CMS Site Status Board for additional information."""
    def __init__(self, config=None):
        config = Configuration(config)

        SiteInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._ssb = SiteStatusBoard(config.get('ssb', None))

        self.ssb_cache_lifetime = config.get('ssb_cache_lifetime', 3600)
        self._ssb_cache_timestamp = 0
        self._caching_lock = threading.Lock()

        self._waitroom_sites = set()
        self._morgue_sites = set()

    def get_site(self, name):  #override
        if not self.check_allowed_site(name):
            LOG.info('get_site(%s)  %s is excluded by configuration.', name,
                     name)
            return None

        LOG.info('get_site(%s)  Fetching information of %s from PhEDEx', name,
                 name)

        # General site info
        result = self._phedex.make_request('nodes', ['node=' + name])
        if len(result) == 0:
            return None

        entry = result[0]

        host = entry['se']
        storage_type = Site.storage_type_val(entry['kind'])

        return Site(name, host=host, storage_type=storage_type)

    def get_site_list(self, inventory):  #override
        LOG.info('get_site_list  Fetching the list of nodes from PhEDEx')

        site_list = []

        for entry in self._phedex.make_request('nodes'):
            site_name = entry['name']
            if not self.check_allowed_site(site_name):
                continue

            siteObj_new = Site(site_name,
                               host=entry['se'],
                               storage_type=Site.storage_type_val(
                                   entry['kind']))
            if site_name in inventory.sites:
                siteObj_old = inventory.sites[site_name]
                siteObj_new.backend = siteObj_old.backend
                siteObj_new.x509proxy = siteObj_old.x509proxy

            site_list.append(siteObj_new)

        return site_list

    def get_site_status(self, site_name):  #override
        with self._caching_lock:
            if time.time(
            ) > self._ssb_cache_timestamp + self.ssb_cache_lifetime:
                self._waitroom_sites = set()
                self._morgue_sites = set()

                latest_status = {}

                # get list of sites in waiting room (153) and morgue (199)
                for colid, stat, sitelist in [
                    (153, Site.STAT_WAITROOM, self._waitroom_sites),
                    (199, Site.STAT_MORGUE, self._morgue_sites)
                ]:
                    result = self._ssb.make_request(
                        'getplotdata',
                        'columnid=%d&time=2184&dateFrom=&dateTo=&sites=all&clouds=undefined&batch=1'
                        % colid)
                    for entry in result:
                        site = entry['VOName']

                        # entry['Time'] is UTC but we are only interested in relative times here
                        timestamp = time.mktime(
                            time.strptime(entry['Time'], '%Y-%m-%dT%H:%M:%S'))
                        if site in latest_status and latest_status[site][
                                0] > timestamp:
                            continue

                        if entry['Status'] == 'in':
                            latest_status[site] = (timestamp, stat)
                        else:
                            latest_status[site] = (timestamp, Site.STAT_READY)

                for site, (_, stat) in latest_status.items():
                    if stat == Site.STAT_WAITROOM:
                        self._waitroom_sites.add(site)
                    elif stat == Site.STAT_MORGUE:
                        self._morgue_sites.add(site)

                self._ssb_cache_timestamp = time.time()

        if site_name in self._waitroom_sites:
            return Site.STAT_WAITROOM
        elif site_name in self._morgue_sites:
            return Site.STAT_MORGUE
        else:
            return Site.STAT_READY

    def get_filename_mapping(self, site_name):  #override
        tfc = self._phedex.make_request('tfc', ['node=' + site_name])['array']

        conversions = {}
        for elem in tfc:
            if elem['element_name'] != 'lfn-to-pfn':
                continue

            if 'destination-match' in elem and re.match(
                    elem['destination-match'], site_name) is None:
                continue

            if 'chain' in elem:
                chain = elem['chain']
            else:
                chain = None

            result = elem['result']
            i = 1
            while '$' in result:
                result = result.replace('$%d' % i, '{%d}' % (i - 1))
                i += 1
                if i == 100:
                    # can't be possibly right
                    break

            result = result.replace('\\', '')

            if elem['protocol'] in conversions:
                conversions[elem['protocol']].append(
                    (elem['path-match'], result, chain))
            else:
                conversions[elem['protocol']] = [(elem['path-match'], result,
                                                  chain)]

        def make_mapping_chains(rule):
            if rule[2] is None:
                return [[(rule[0], rule[1])]]
            else:
                if rule[2] not in conversions:
                    return None

                chains = []
                for chained_rule in conversions[rule[2]]:
                    mapped_chains = make_mapping_chains(chained_rule)
                    if mapped_chains is None:
                        continue

                    chains.extend(mapped_chains)

                for chain in chains:
                    chain.append((rule[0], rule[1]))

                return chains

        mappings = {}

        for protocol, rules in conversions.items():
            if protocol == 'direct':
                continue

            if protocol == 'srmv2':
                # for historic reasons PhEDEx calls gfal2 srmv2
                protocol = 'gfal2'

            mapping = []

            for rule in rules:
                chains = make_mapping_chains(rule)
                if chains is None:
                    continue

                mapping.extend(chains)

            mappings[protocol] = mapping

        return mappings

Example #13

Show file

File: phedexdeletion.py Project: SmartDataProjects/dynamo-cms

class PhEDExDeletionInterface(DeletionInterface):
    """Deletion using PhEDEx."""
    def __init__(self, config=None):
        config = Configuration(config)

        DeletionInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.auto_approval = config.get('auto_approval', True)
        self.allow_tape_deletion = config.get('allow_tape_deletion', True)
        self.tape_auto_approval = config.get('tape_auto_approval', False)

        self.deletion_chunk_size = config.get('chunk_size', 50.) * 1.e+12

    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        if site.storage_type == Site.TYPE_MSS and not self.allow_tape_deletion:
            LOG.warning('Deletion from MSS not allowed by configuration.')
            return []

        if self.allow_tape_deletion and self.auto_approval:
            LOG.warning(
                'You cannot have auto-approved tape deletions. Set auto-approval to False.'
            )
            return []

        # execute the deletions in two steps: one for dataset-level and one for block-level
        datasets = []
        blocks = []

        # maps used later for cloning
        # getting ugly here.. should come up with a better way of making clones
        replica_map = {}
        block_replica_map = {}

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                datasets.append(dataset_replica.dataset)
            else:
                blocks.extend(br.block for br in block_replicas)

                replica_map[dataset_replica.dataset] = dataset_replica
                block_replica_map.update(
                    (br.block, br) for br in block_replicas)

        success = []

        deleted_datasets = self._run_deletion_request(operation_id, site,
                                                      'dataset', datasets,
                                                      comments)

        for dataset in deleted_datasets:
            replica = DatasetReplica(dataset,
                                     site,
                                     growing=False,
                                     group=Group.null_group)
            success.append((replica, None))

        tmp_map = dict((dataset, []) for dataset in replica_map.iterkeys())

        deleted_blocks = self._run_deletion_request(operation_id, site,
                                                    'block', blocks, comments)

        for block in deleted_blocks:
            tmp_map[block.dataset].append(block)

        for dataset, blocks in tmp_map.iteritems():
            replica = DatasetReplica(dataset, site)
            replica.copy(replica_map[dataset])

            success.append((replica, []))
            for block in blocks:
                block_replica = BlockReplica(block, site, Group.null_group)
                block_replica.copy(block_replica_map[block])
                block_replica.last_update = int(time.time())
                success[-1][1].append(block_replica)

        return success

    def _run_deletion_request(self, operation_id, site, level, deletion_list,
                              comments):
        full_catalog = collections.defaultdict(list)

        if level == 'dataset':
            for dataset in deletion_list:
                full_catalog[dataset] = []
        elif level == 'block':
            for block in deletion_list:
                full_catalog[block.dataset].append(block)

        history_sql = 'INSERT INTO `phedex_requests` (`id`, `operation_type`, `operation_id`, `approved`) VALUES (%s, \'deletion\', %s, %s)'

        deleted_items = []

        request_catalog = {}
        chunk_size = 0
        items = []
        while len(full_catalog) != 0:
            dataset, blocks = full_catalog.popitem()
            request_catalog[dataset] = blocks

            if level == 'dataset':
                chunk_size += dataset.size
                items.append(dataset)
            elif level == 'block':
                chunk_size += sum(b.size for b in blocks)
                items.extend(blocks)

            if chunk_size < self.deletion_chunk_size and len(
                    full_catalog) != 0:
                continue

            options = {
                'node': site.name,
                'data': self._phedex.form_catalog_xml(request_catalog),
                'level': level,
                'rm_subscriptions': 'y',
                'comments': comments
            }

            # result = [{'id': <id>}] (item 'request_created' of PhEDEx response) if successful
            try:
                if self._read_only:
                    result = [{'id': 0}]
                else:
                    result = self._phedex.make_request('delete',
                                                       options,
                                                       method=POST)
            except:
                LOG.error('Deletion %s failed.', str(options))

                if self._phedex.last_errorcode == 400:
                    # Sometimes we have invalid data in the list of objects to delete.
                    # PhEDEx throws a 400 error in such a case. We have to then try to identify the
                    # problematic item through trial and error.
                    if len(items) == 1:
                        LOG.error('Could not delete %s from %s', str(items[0]),
                                  site.name)
                    else:
                        LOG.info('Retrying with a reduced item list.')
                        deleted_items.extend(
                            self._run_deletion_request(operation_id, site,
                                                       level,
                                                       items[:len(items) / 2],
                                                       comments))
                        deleted_items.extend(
                            self._run_deletion_request(operation_id, site,
                                                       level,
                                                       items[len(items) / 2:],
                                                       comments))
                else:
                    raise
            else:
                request_id = int(result[0]['id'])  # return value is a string
                LOG.warning('PhEDEx deletion request id: %d', request_id)

                approved = False

                if self._read_only:
                    approved = True

                elif self.auto_approval:
                    try:
                        result = self._phedex.make_request('updaterequest', {
                            'decision': 'approve',
                            'request': request_id,
                            'node': site.name
                        },
                                                           method=POST)
                    except:
                        LOG.error('deletion approval of request %d failed.',
                                  request_id)
                    else:
                        approved = True

                if not self._read_only:
                    self._history.db.query(history_sql, request_id,
                                           operation_id, approved)

                if approved:
                    deleted_items.extend(items)

            request_catalog = {}
            chunk_size = 0
            items = []

        return deleted_items

    def deletion_status(self, request_id):  #override
        request = self._phedex.make_request('deleterequests',
                                            'request=%d' % request_id)
        if len(request) == 0:
            return {}

        node_info = request[0]['nodes']['node'][0]
        site_name = node_info['name']
        last_update = node_info['decided_by']['time_decided']

        status = {}
        for ds_entry in request[0]['data']['dbs']['dataset']:
            status[ds_entry['name']] = (ds_entry['bytes'], ds_entry['bytes'],
                                        last_update)

        return status

Example #14

Show file

File: phedexgroupinfo.py Project: SmartDataProjects/dynamo-cms

    def __init__(self, config):
        GroupInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

Example #15

Show file

class PhEDExDatasetInfoSource(DatasetInfoSource):
    """DatasetInfoSource using PhEDEx and DBS."""
    def __init__(self, config=None):
        if config is None:
            config = Configuration()

        DatasetInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._dbs = DBS(config.get('dbs', None))

    def get_dataset_names(self, include=['*'], exclude=[]):
        dataset_names = []

        exclude_exps = []
        for pattern in exclude:
            exclude_exps.append(re.compile(fnmatch.translate(pattern)))

        def add_datasets(result):
            for entry in result:
                name = entry['dataset']
                for ex_exp in exclude_exps:
                    if ex_exp.match(name):
                        break
                else:
                    # not excluded by args, now check my include/exclude list
                    if self.check_allowed_dataset(name):
                        dataset_names.append(name)

        if len(include) == 1 and include[0] == '/*/*/*':
            # all datasets requested - will do this efficiently
            result = self._dbs.make_request('acquisitioneras')
            sds = [entry['acquisition_era_name'] for entry in result]

            # query DBS in parallel
            args = [('datasets', ['acquisition_era_name=' + sd]) for sd in sds]
            results = Map().execute(self._dbs.make_request, args)
            for result in results:
                add_datasets(result)

        for in_pattern in include:
            if in_pattern.startswith('/') and in_pattern.count('/') == 3:
                result = self._dbs.make_request('datasets',
                                                ['dataset=' + in_pattern])
                add_datasets(result)

        return dataset_names

    def get_updated_datasets(self, updated_since):  #override
        LOG.warning(
            'PhEDExDatasetInfoSource can only return a list of datasets and blocks that are created since the given timestamp.'
        )

        result = self._phedex.make_request('data', [
            'dataset=' + name, 'level=block',
            'create_since=%d' % updated_since
        ])

        try:
            dataset_entries = result[0]['dataset']
        except:
            return []

        if self.include is not None or self.exclude is not None:
            ientry = 0
            while ientry != len(dataset_entries):
                if self.check_allowed_dataset(dataset_entries[ientry]['name']):
                    ientry += 1
                else:
                    dataset_entries.pop(ientry)

        return Map().execute(self._create_dataset, dataset_entries)

    def get_dataset(self, name, with_files=False):  #override
        ## Get the full dataset-block-file data from PhEDEx
        if not name.startswith('/') or name.count('/') != 3:
            return None

        if not self.check_allowed_dataset(name):
            return None

        def get_dbs_datasets(name, dbs_data):
            dbs_data['datasets'] = self._dbs.make_request(
                'datasets',
                ['dataset=' + name, 'dataset_access_type=*', 'detail=True'])

        def get_dbs_releaseversions(name, dbs_data):
            dbs_data['releaseversions'] = self._dbs.make_request(
                'releaseversions', ['dataset=' + name])

        dbs_data = {}
        th1 = threading.Thread(target=get_dbs_datasets, args=(name, dbs_data))
        th1.start()
        th2 = threading.Thread(target=get_dbs_releaseversions,
                               args=(name, dbs_data))
        th2.start()

        if with_files:
            level = 'file'
        else:
            level = 'block'

        result = self._phedex.make_request(
            'data', ['dataset=' + name, 'level=' + level])

        th1.join()
        th2.join()

        try:
            dataset_entry = result[0]['dataset'][0]
        except:
            return None

        ## Create the dataset object
        dataset = self._create_dataset(dataset_entry, dbs_data)

        ## Fill block and file data
        if 'block' in dataset_entry:
            for block_entry in dataset_entry['block']:
                block = self._create_block(block_entry, dataset)
                dataset.blocks.add(block)

                if with_files and 'file' in block_entry:
                    # See comments in get_block
                    block._files = set()
                    for file_entry in block_entry['file']:
                        block._files.add(self._create_file(file_entry, block))

        return dataset

    def get_block(self, name, with_files=False):  #override
        ## Get the full block-file data from PhEDEx
        if not name.startswith('/') or name.count('/') != 3 or '#' in name:
            return None

        if not self.check_allowed_dataset(name[:name.find('#')]):
            return None

        if with_files:
            level = 'file'
        else:
            level = 'block'

        result = self._phedex.make_request('data',
                                           ['block=' + name, 'level=' + level])

        try:
            dataset_entry = result[0]['dataset'][0]
            block_entry = dataset_entry['block'][0]
        except:
            return None

        # Just need a named object
        dataset = Dataset(dataset_entry['name'])

        block = self._create_block(block_entry, dataset)

        if with_files and 'file' in block_entry:
            # _create_block sets size and num_files; just need to update the files list
            # Directly creating the _files set
            # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block
            # from this function is only used temporarily anyway
            block._files = set()
            for file_entry in block_entry['file']:
                block._files.add(self._create_file(file_entry, block))

        return block

    def get_file(self, name):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            dataset_entry = result[0]['dataset'][0]
            block_entry = dataset_entry['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        if not self.check_allowed_deataset(dataset_entry['name']):
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        # Just need a named object
        dataset = Dataset(dataset_entry['name'])
        block = Block(block_name, dataset)

        lfile = self._create_file(file_entry, block)

        return lfile

    def get_files(self, dataset_or_block):  #override
        files = set()

        if type(dataset_or_block) is Dataset:
            result = self._phedex.make_request(
                'data', ['dataset=' + dataset_or_block.name, 'level=file'])
            blocks = dict((b.name, b) for b in dataset_or_block.blocks)
        else:
            result = self._phedex.make_request(
                'data',
                ['block=' + dataset_or_block.full_name(), 'level=file'])
            blocks = {dataset_or_block.name: dataset_or_block}

        try:
            block_entries = result[0]['dataset'][0]['block']
        except:
            return files

        for block_entry in block_entries:
            try:
                file_entries = block_entry['file']
            except:
                continue

            bname = block_entry['name']
            block_name = Block.to_internal_name(bname[bname.find('#') + 1:])
            try:
                block = blocks[block_name]
            except:
                # unknown block! maybe should raise?
                continue

            for file_entry in file_entries:
                files.add(self._create_file(file_entry, block))

        return files

    def _create_dataset(self, dataset_entry, dbs_data=None):
        """
        Create a dataset object with blocks and files from a PhEDEx dataset entry
        """

        dataset = Dataset(dataset_entry['name'],
                          is_open=(dataset_entry['is_open'] == 'y'))

        if 'time_update' in dataset_entry and dataset_entry[
                'time_update'] is not None:
            dataset.last_update = int(dataset_entry['time_update'])
        else:
            dataset.last_update = int(dataset_entry['time_create'])

        ## Get other details of the dataset from DBS
        self._fill_dataset_details(dataset, dbs_data)

        return dataset

    def _create_block(self, block_entry, dataset):
        """
        Create a block object with files from a PhEDEx block entry
        """

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        block = Block(block_name,
                      dataset,
                      size=block_entry['bytes'],
                      num_files=block_entry['files'],
                      is_open=(block_entry['is_open'] == 'y'))

        if 'time_update' in block_entry and block_entry[
                'time_update'] is not None:
            block.last_update = int(block_entry['time_update'])
        else:
            block.last_update = int(block_entry['time_create'])

        return block

    def _create_file(self, file_entry, block):
        adler32 = ''
        crc32 = 0
        for cksum in file_entry['checksum'].split(','):
            if cksum.startswith('adler32'):
                adler32 = cksum[8:]
            elif cksum.startswith('cksum'):
                crc32 = int(cksum[6:])

        lfile = File(file_entry['lfn'],
                     block=block,
                     size=file_entry['size'],
                     checksum=(crc32, adler32))

        return lfile

    def _fill_dataset_details(self, dataset, dbs_data=None):
        if dbs_data is None:
            dbs_data = {}

            if dataset.name.startswith('/') and dataset.name.count('/') == 3:
                dbs_data['datasets'] = self._dbs.make_request(
                    'datasets', [
                        'dataset=' + dataset.name, 'dataset_access_type=*',
                        'detail=True'
                    ])
            else:
                dbs_data['datasets'] = []

            dbs_data['releaseversions'] = self._dbs.make_request(
                'releaseversions', ['dataset=' + dataset.name])

        # 1. status and PD type

        if len(dbs_data['datasets']) != 0:
            dbs_entry = dbs_data['datasets'][0]
            dataset.status = Dataset.status_val(
                dbs_entry['dataset_access_type'])
            dataset.data_type = Dataset.data_type_val(
                dbs_entry['primary_ds_type'])
        else:
            dataset.status = Dataset.STAT_UNKNOWN
            dataset.data_type = Dataset.TYPE_UNKNOWN

        # 2. software version

        if len(dbs_data['releaseversions']) != 0:
            try:
                version = dbs_data['releaseversions'][0]['release_version'][0]
            except KeyError:
                pass
            else:
                matches = re.match('CMSSW_([0-9]+)_([0-9]+)_([0-9]+)(|_.*)',
                                   version)
                if matches:
                    cycle, major, minor = map(
                        int, [matches.group(i) for i in range(1, 4)])

                    if matches.group(4):
                        suffix = matches.group(4)[1:]
                    else:
                        suffix = ''

                    dataset.software_version = (cycle, major, minor, suffix)

Example #16

Show file

class PhEDExDeletionInterface(DeletionInterface):
    """Deletion using PhEDEx."""
    def __init__(self, config):
        DeletionInterface.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

        self.auto_approval = config.auto_approval
        self.allow_tape_deletion = config.allow_tape_deletion
        self.tape_auto_approval = config.tape_auto_approval

        self.deletion_chunk_size = config.chunk_size * 1.e+12

    def schedule_deletion(self, replica, comments=''):  #override
        request_mapping = {}

        if replica.site.storage_type == Site.TYPE_MSS and self.allow_tape_deletion:
            LOG.warning('Deletion from MSS is not allowed by configuration.')
            return request_mapping

        deletion_list = []
        if type(replica) is DatasetReplica:
            replica_blocks = set(r.block for r in replica.block_replicas)

            if replica_blocks == replica.dataset.blocks:
                deletion_list.append(replica.dataset)
                level = 'dataset'
            else:
                deletion_list.extend(replica_blocks)
                level = 'block'

        else:  #BlockReplica
            deletion_list.append(replica.block)
            level = 'block'

        self._run_deletion_request(request_mapping, replica.site, level,
                                   deletion_list, comments)

        return request_mapping

    def schedule_deletions(self, replica_list, comments=''):  #override
        request_mapping = {}

        replicas_by_site = collections.defaultdict(list)
        for replica in replica_list:
            replicas_by_site[replica.site].append(replica)

            if replica.site.storage_type == Site.TYPE_MSS and not self.allow_tape_deletion:
                LOG.warning('Deletion from MSS not allowed by configuration.')
                return {}

        for site, replica_list in replicas_by_site.iteritems():
            # execute the deletions in two steps: one for dataset-level and one for block-level
            deletion_lists = {'dataset': [], 'block': []}

            for replica in replica_list:
                if type(replica) is DatasetReplica:
                    blocks = set(r.block for r in replica.block_replicas)

                    if blocks == replica.dataset.blocks:
                        deletion_lists['dataset'].append(replica.dataset)
                    else:
                        deletion_lists['block'].extend(blocks)

                else:  #BlockReplica
                    deletion_lists['block'].append(replica.block)

            self._run_deletion_request(request_mapping, site, 'dataset',
                                       deletion_lists['dataset'], comments)
            self._run_deletion_request(request_mapping, site, 'block',
                                       deletion_lists['block'], comments)

        return request_mapping

    def _run_deletion_request(self, request_mapping, site, level,
                              deletion_list, comments):
        full_catalog = collections.defaultdict(list)

        if level == 'dataset':
            for dataset in deletion_list:
                full_catalog[dataset] = []
        elif level == 'block':
            for block in deletion_list:
                full_catalog[block.dataset].append(block)

        request_catalog = {}
        chunk_size = 0
        items = []
        while len(full_catalog) != 0:
            dataset, blocks = full_catalog.popitem()
            request_catalog[dataset] = blocks

            if level == 'dataset':
                chunk_size += dataset.size
                items.append(dataset)
            elif level == 'block':
                chunk_size += sum(b.size for b in blocks)
                items.extend(blocks)

            if chunk_size < self.deletion_chunk_size and len(
                    full_catalog) != 0:
                continue

            options = {
                'node': site.name,
                'data': self._phedex.form_catalog_xml(request_catalog),
                'level': level,
                'rm_subscriptions': 'y',
                'comments': comments
            }

            # result = [{'id': <id>}] (item 'request_created' of PhEDEx response) if successful
            if self.dry_run:
                result = [{'id': '0'}]
            else:
                try:
                    result = self._phedex.make_request('delete',
                                                       options,
                                                       method=POST)
                except:
                    if self._phedex.last_errorcode == 400:
                        # Sometimes we have invalid data in the list of objects to delete.
                        # PhEDEx throws a 400 error in such a case. We have to then try to identify the
                        # problematic item through trial and error.
                        if len(items) == 1:
                            LOG.error('Could not delete %s from %s',
                                      str(items[0]), site.name)
                            result = []
                        else:
                            self._run_deletion_request(request_mapping, site,
                                                       level,
                                                       item[:len(item) / 2],
                                                       comments)
                            self._run_deletion_request(request_mapping, site,
                                                       level,
                                                       item[len(item) / 2:],
                                                       comments)
                    else:
                        result = []

            if len(result) != 0:
                request_id = int(result[0]['id'])  # return value is a string
                LOG.warning('PhEDEx deletion request id: %d', request_id)

                approved = False

                if self.dry_run:
                    approved = True

                elif self.auto_approval:
                    try:
                        result = self._phedex.make_request('updaterequest', {
                            'decision': 'approve',
                            'request': request_id,
                            'node': site.name
                        },
                                                           method=POST)
                    except:
                        LOG.error('deletion approval of request %d failed.',
                                  request_id)
                    else:
                        approved = True

                request_mapping[request_id] = (approved, site, items)

            else:
                LOG.error('Deletion %s failed.', str(options))
                # we should probably do something here

            request_catalog = {}
            chunk_size = 0
            items = []

    def deletion_status(self, request_id):  #override
        request = self._phedex.make_request('deleterequests',
                                            'request=%d' % request_id)
        if len(request) == 0:
            return {}

        node_info = request[0]['nodes']['node'][0]
        site_name = node_info['name']
        last_update = node_info['decided_by']['time_decided']

        status = {}
        for ds_entry in request[0]['data']['dbs']['dataset']:
            status[ds_entry['name']] = (ds_entry['bytes'], ds_entry['bytes'],
                                        last_update)

        return status

Example #17

Show file

File: invalidation.py Project: SmartDataProjects/dynamo-cms

class InvalidationRequest(WebModule):
    def __init__(self, config):
        WebModule.__init__(self, config)

        self.dbs = DBS()
        self.phedex = PhEDEx()
        self.registry = RegistryDatabase()
        self.authorized_users = list(config.file_invalidation.authorized_users)

    def run(self, caller, request, inventory):
        if caller.name not in self.authorized_users:
            raise AuthorizationError()

        try:
            item = request['item']
        except KeyError:
            raise MissingParameter('item')

        if type(item) is list:
            items = item
        else:
            items = [item]

        invalidated_items = []

        sql = 'INSERT INTO `invalidations` (`item`, `db`, `user_id`, `timestamp`) VALUES (%s, %s, %s, NOW())'

        for item in items:
            invalidated = False

            if item in inventory.datasets:
                # item is a dataset

                result = self.dbs.make_request('datasets', [
                    'dataset=' + item, 'dataset_access_type=*', 'detail=true'
                ])
                if len(result) != 0:
                    status = result[0]['dataset_access_type']
                    if status in ('VALID', 'PRODUCTION'):
                        self.registry.db.query(sql, item, 'dbs', caller.id)

                    for entry in self.dbs.make_request(
                            'files', ['dataset=' + item, 'validFileOnly=1']):
                        self.registry.db.query(sql, entry['logical_file_name'],
                                               'dbs', caller.id)

                    invalidated = True

                result = self.phedex.make_request(
                    'data', ['dataset=' + item, 'level=block'])
                if len(result) != 0:
                    self.registry.db.query(sql, item, 'tmdb', caller.id)
                    invalidated = True

            else:
                try:
                    dataset_name, block_name = Block.from_full_name(item)
                except:
                    lfile = inventory.find_file(item)
                    if lfile is not None:
                        # item is a file

                        result = self.dbs.make_request(
                            'files',
                            ['logical_file_name=' + item, 'validFileOnly=1'])
                        if len(result) != 0:
                            self.registry.db.query(
                                sql, result[0]['logical_file_name'], 'dbs',
                                caller.id)
                            invalidated = True

                        result = self.phedex.make_request(
                            'data', ['file=' + item])
                        if len(result) != 0:
                            self.registry.db.query(sql, item, 'tmdb',
                                                   caller.id)
                            invalidated = True

                else:
                    # item is a block

                    for entry in self.dbs.make_request(
                            'files',
                        ['block_name=' + item, 'validFileOnly=1']):
                        self.registry.db.query(sql, entry['logical_file_name'],
                                               'dbs', caller.id)
                        invalidated = True

                    result = self.phedex.make_request(
                        'data', ['block=' + item, 'level=block'])
                    if len(result) != 0:
                        self.registry.db.query(sql, item, 'tmdb', caller.id)
                        invalidated = True

            if invalidated:
                invalidated_items.append({'item': item})

        return invalidated_items

Example #18

Show file

    def __init__(self, config):
        GroupInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)

Example #19

Show file

    def __init__(self, config):
        ReplicaInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.phedex)