Exemple #1
0
    def _load_blocks(self, inventory, id_dataset_map, id_block_maps):
        sql = 'SELECT b.`id`, b.`dataset_id`, b.`name`, b.`size`, b.`num_files`, b.`is_open`, UNIX_TIMESTAMP(b.`last_update`) FROM `blocks` AS b'

        if self._mysql.table_exists('datasets_load_tmp'):
            sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = b.`dataset_id`'

        sql += ' ORDER BY b.`dataset_id`'

        _dataset_id = 0
        dataset = None
        for block_id, dataset_id, name, size, num_files, is_open, last_update in self._mysql.xquery(
                sql):
            if dataset_id != _dataset_id:
                _dataset_id = dataset_id

                dataset = id_dataset_map[dataset_id]
                dataset.blocks.clear()
                dataset.size = 0
                dataset.num_files = 0

                id_block_map = id_block_maps[dataset_id] = {}

            block = Block(Block.to_internal_name(name), dataset, size,
                          num_files, (is_open == 1), last_update)

            dataset.blocks.add(block)
            dataset.size += block.size
            dataset.num_files += block.num_files

            id_block_map[block_id] = block
Exemple #2
0
    def get_file(self, name):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            dataset_entry = result[0]['dataset'][0]
            block_entry = dataset_entry['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        if not self.check_allowed_deataset(dataset_entry['name']):
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        # Just need a named object
        dataset = Dataset(dataset_entry['name'])
        block = Block(block_name, dataset)

        lfile = self._create_file(file_entry, block)

        return lfile
    def load(self, inventory):
        for site in inventory.sites.itervalues():
            if site.storage_type != Site.TYPE_MSS:
                continue

            requests = self._phedex.make_request(
                'transferrequests', ['node=' + site.name, 'approval=pending'])
            for request in requests:
                for dest in request['destinations']['node']:
                    if dest['name'] != site.name:
                        continue

                    if 'decided_by' in dest:
                        break

                    for dataset_entry in request['data']['dbs']['dataset']:
                        try:
                            dataset = inventory.datasets[dataset_entry['name']]
                        except KeyError:
                            continue

                        dataset.attr['tape_copy_requested'] = True

                    for block_entry in request['data']['dbs']['block']:
                        dataset_name, block_name = Block.from_full_name(
                            block_entry['name'])
                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            continue

                        # just label the entire dataset
                        dataset.attr['tape_copy_requested'] = True
Exemple #4
0
    def load(self, inventory):
        # collect the name of items that are not yet activated or are activated but not queued
        sql = 'SELECT i.`item` FROM `copy_request_items` AS i INNER JOIN `copy_requests` AS r ON r.`id` = i.`request_id`'
        sql += ' WHERE r.`status` = \'new\''
        items = self.registry.db.query(sql)
        items += self.registry.db.query(
            'SELECT `item` FROM `active_copies` WHERE `status` = \'new\'')

        for item_name in items:
            try:
                dataset_name, block_name = Block.from_full_name(item_name)
            except ObjectError:
                dataset_name, block_name = item_name, None

            try:
                dataset = inventory.datasets[dataset_name]
            except KeyError:
                continue

            if block_name is not None:
                block = dataset.find_block(block_name)
                if block is None:
                    continue

            dataset.attr['unhandled_copy_exists'] = True
Exemple #5
0
    def _create_block(self, block_entry, dataset):
        """
        Create a block object with files from a PhEDEx block entry
        """

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        block = Block(block_name,
                      dataset,
                      size=block_entry['bytes'],
                      num_files=block_entry['files'],
                      is_open=(block_entry['is_open'] == 'y'))

        if 'time_update' in block_entry and block_entry[
                'time_update'] is not None:
            block.last_update = int(block_entry['time_update'])
        else:
            block.last_update = int(block_entry['time_create'])

        return block
Exemple #6
0
    def make_block_replicas(dataset_entries, replica_maker):
        """Return a list of block replicas linked to Dataset, Block, Site, and Group"""

        block_replicas = []

        for dataset_entry in dataset_entries:
            dataset = Dataset(dataset_entry['name'])

            for block_entry in dataset_entry['block']:
                name = block_entry['name']
                try:
                    block_name = Block.to_internal_name(name[name.find('#') +
                                                             1:])
                except ValueError:  # invalid name
                    continue

                block = Block(block_name, dataset, block_entry['bytes'])

                block_replicas.extend(replica_maker(block, block_entry))

        return block_replicas
    def make_block_replicas(block_entries,
                            replica_maker,
                            site_check=None,
                            dataset_check=None):
        """Return a list of block replicas linked to Dataset, Block, Site, and Group"""

        dataset = None
        block_replicas = []

        for block_entry in block_entries:
            try:
                dataset_name, block_name = Block.from_full_name(
                    block_entry['name'])
            except ObjectError:  # invalid name
                continue

            if dataset is None or dataset.name != dataset_name:
                if dataset_check and not dataset_check(dataset_name):
                    continue

                try:
                    dataset = Dataset(dataset_name)
                except ObjectError:
                    # invalid name
                    dataset = None

            if dataset is None:
                continue

            block = Block(block_name, dataset, block_entry['bytes'])
            if block.size is None:
                block.size = 0

            block_replicas.extend(
                replica_maker(block, block_entry, site_check=site_check))

        return block_replicas
    def run(self, caller, request, inventory):
        if caller.name not in self.authorized_users:
            raise AuthorizationError()

        try:
            item = request['item']
        except KeyError:
            raise MissingParameter('item')

        if type(item) is list:
            items = item
        else:
            items = [item]

        cancelled_items = []

        sql = 'DELETE FROM `invalidations` WHERE `item` = %s AND `user_id` = %s'

        for item in items:
            deleted = self.registry.db.query(sql, item, caller.id)
            if deleted != 0:
                cancelled_items.append({'item': item})

            if item in inventory.datasets:
                # item is a dataset

                for entry in self.dbs.make_request(
                        'files', ['dataset=' + item, 'validFileOnly=1']):
                    self.registry.db.query(sql, entry['logical_file_name'],
                                           caller.id)

            else:
                try:
                    dataset_name, block_name = Block.from_full_name(item)
                except:
                    pass
                else:
                    # item is a block

                    for entry in self.dbs.make_request(
                            'files',
                        ['block_name=' + item, 'validFileOnly=1']):
                        self.registry.db.query(sql, entry['logical_file_name'],
                                               caller.id)

        return cancelled_items
Exemple #9
0
    def postprocess(self, cycle_number, copy_list):  # override
        """
        Create active copy entries for accepted copies.
        """

        for request in self.activated_requests:
            updated = False

            for action in request.actions:
                try:
                    dataset_name, block_name = Block.from_full_name(
                        action.item)
                except ObjectError:
                    dataset_name = action.item
                    block_name = None

                for replica in copy_list:
                    if replica.site.name != action.site:
                        continue

                    if replica.growing:
                        # full dataset copy - dataset and block requests are both queued
                        if dataset_name == replica.dataset.name:
                            action.status = RequestAction.ST_QUEUED

                    else:
                        # match block-by-block
                        if block_name is None:
                            # dataset request
                            continue

                        for block_replica in replica.block_replicas:
                            if block_name == block_replica.block.real_name():
                                action.status = RequestAction.ST_QUEUED
                                break

                    if action.status == RequestAction.ST_QUEUED:
                        updated = True
                        # action got queued - no need to check other replicas
                        break

            if updated:
                self.request_manager.update_request(request)
Exemple #10
0
    def postprocess(self, cycle_number, copy_list): # override
        """
        Create active copy entries for accepted copies.
        """

        for request in self.activated_requests:
            updated = False

            for action in request.actions:
                try:
                    dataset_name, block_name = Block.from_full_name(action.item)
                except ObjectError:
                    dataset_name = action.item
                    block_name = None
                    
                for replica in copy_list:
                    if replica.site.name != action.site:
                        continue

                    if replica.growing:
                        # full dataset copy - dataset and block requests are both queued
                        if dataset_name == replica.dataset.name:
                            action.status = RequestAction.ST_QUEUED

                    else:
                        # match block-by-block
                        if block_name is None:
                            # dataset request
                            continue

                        for block_replica in replica.block_replicas:
                            if block_name == block_replica.block.real_name():
                                action.status = RequestAction.ST_QUEUED
                                break

                    if action.status == RequestAction.ST_QUEUED:
                        updated = True
                        # action got queued - no need to check other replicas
                        break

            if updated:
                self.request_manager.update_request(request)
    def load(self, inventory):
        # collect the name of items that are not yet activated or are activated but not queued
        sql = 'SELECT i.`item` FROM `copy_request_items` AS i INNER JOIN `copy_requests` AS r ON r.`id` = i.`request_id`'
        sql += ' WHERE r.`status` = \'new\''
        items = self.registry.db.query(sql)
        items += self.registry.db.query('SELECT `item` FROM `active_copies` WHERE `status` = \'new\'')

        for item_name in items:
            try:
                dataset_name, block_name = Block.from_full_name(item_name)
            except ObjectError:
                dataset_name, block_name = item_name, None

            try:
                dataset = inventory.datasets[dataset_name]
            except KeyError:
                continue

            if block_name is not None:
                block = dataset.find_block(block_name)
                if block is None:
                    continue

            dataset.attr['unhandled_copy_exists'] = True
Exemple #12
0
    def get_files(self, dataset_or_block):  #override
        files = set()

        if type(dataset_or_block) is Dataset:
            result = self._phedex.make_request(
                'data', ['dataset=' + dataset_or_block.name, 'level=file'])
            blocks = dict((b.name, b) for b in dataset_or_block.blocks)
        else:
            result = self._phedex.make_request(
                'data',
                ['block=' + dataset_or_block.full_name(), 'level=file'])
            blocks = {dataset_or_block.name: dataset_or_block}

        try:
            block_entries = result[0]['dataset'][0]['block']
        except:
            return files

        for block_entry in block_entries:
            try:
                file_entries = block_entry['file']
            except:
                continue

            bname = block_entry['name']
            block_name = Block.to_internal_name(bname[bname.find('#') + 1:])
            try:
                block = blocks[block_name]
            except:
                # unknown block! maybe should raise?
                continue

            for file_entry in file_entries:
                files.add(self._create_file(file_entry, block))

        return files
Exemple #13
0
    def update(self, inventory):
        for dataset in inventory.datasets.itervalues():
            try:
                dataset.attr.pop('locked_blocks')
            except KeyError:
                pass

        for source, content_type, site_pattern, lock_url in self._sources.itervalues(
        ):
            if lock_url is not None:
                # check that the lock files themselves are not locked
                while True:
                    # Hacky but this is temporary any way
                    opener = urllib2.build_opener(
                        webservice.HTTPSCertKeyHandler(Configuration()))
                    opener.addheaders.append(('Accept', 'application/json'))
                    request = urllib2.Request(lock_url)
                    try:
                        opener.open(request)
                    except urllib2.HTTPError as err:
                        if err.code == 404:
                            # file not found -> no lock
                            break
                        else:
                            raise

                    LOG.info(
                        'Lock files are being produced. Waiting 60 seconds.')
                    time.sleep(60)

            LOG.info('Retrieving lock information from %s', source.url_base)

            data = source.make_request()

            if content_type == WebReplicaLock.LIST_OF_DATASETS:
                # simple list of datasets
                for dataset_name in data:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS:
                # data['result'] -> simple list of datasets
                for dataset_name in data['result']:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.SITE_TO_DATASETS:
                # data = {site: {dataset: info}}
                for site_name, objects in data.items():
                    try:
                        site = inventory.sites[site_name]
                    except KeyError:
                        LOG.debug('Unknown site %s in %s', site_name,
                                  source.url_base)
                        continue

                    for object_name, info in objects.items():
                        if not info['lock']:
                            LOG.debug('Object %s is not locked at %s',
                                      object_name, site_name)
                            continue

                        if '#' in object_name:
                            dataset_name, block_real_name = object_name.split(
                                '#')
                        else:
                            dataset_name = object_name
                            block_real_name = None

                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            LOG.debug('Unknown dataset %s in %s', dataset_name,
                                      source.url_base)
                            continue

                        replica = site.find_dataset_replica(dataset)
                        if replica is None:
                            LOG.debug('Replica of %s is not at %s in %s',
                                      dataset_name, site_name, source.url_base)
                            continue

                        if block_real_name is None:
                            blocks = list(dataset.blocks)
                        else:
                            block = dataset.find_block(
                                Block.to_internal_name(block_real_name))
                            if block is None:
                                LOG.debug('Unknown block %s of %s in %s',
                                          block_real_name, dataset_name,
                                          source.url_base)
                                continue

                            blocks = [block]

                        try:
                            locked_blocks = dataset.attr['locked_blocks']
                        except KeyError:
                            locked_blocks = dataset.attr['locked_blocks'] = {}

                        if site in locked_blocks:
                            locked_blocks[site].update(blocks)
                        else:
                            locked_blocks[site] = set(blocks)
Exemple #14
0
    def get_requests(self, inventory, policy):  #override
        """
        1. Request all active transfers in new state (these were not queued in the last cycle)
        2. Find all transfer requests with status new.
        3. Decide whether to accept the request. Set status accordingly.
        4. Find the destinations if wildcard was used.
        """

        partition = inventory.partitions[policy.partition_name]

        overwritten_groups = [
            inventory.groups[name] for name in self.overwritten_groups
        ]

        self.activated_requests = []

        # full list of blocks to be proposed to Dealer
        blocks_to_propose = {}  # {site: {dataset: set of blocks}}

        now = int(time.time())

        # Re-request new actions within activated requests

        self.request_manager.lock()
        active_requests = self.request_manager.get_requests(
            statuses=[Request.ST_ACTIVATED])

        activation_list = []

        for request in active_requests.itervalues():
            updated = False
            to_be_activated = False

            for action in request.actions:
                if action.status != RequestAction.ST_NEW:
                    continue

                try:
                    site = inventory.sites[action.site]
                except KeyError:
                    action.status = RequestAction.ST_FAILED
                    action.last_update = now
                    updated = True
                    continue

                try:
                    dataset_name, block_name = Block.from_full_name(
                        action.item)

                except ObjectError:
                    # action.item is (supposed to be) a dataset name

                    try:
                        dataset = inventory.datasets[action.item]
                    except KeyError:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True
                        continue

                    existing_replica = site.find_dataset_replica(dataset)

                    if existing_replica is not None:
                        if existing_replica.is_complete():
                            action.status = RequestAction.ST_COMPLETED
                        else:
                            # it was queued by someone
                            action.status = RequestAction.ST_QUEUED
                        action.last_update = now
                        updated = True

                    else:
                        activation_list.append((dataset, site))
                        to_be_activated = True

                else:
                    # action.item is a block name

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True
                        continue

                    block = dataset.find_block(block_name)

                    if block is None:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True
                        continue

                    existing_replica = block.find_replica(site)

                    if existing_replica is not None:
                        if existing_replica.is_complete():
                            action.status = RequestAction.ST_COMPLETED
                        else:
                            action.status = RequestAction.ST_QUEUED
                        action.last_update = now
                        updated = True

                    else:
                        activation_list.append((block, site))
                        to_be_activated = True

            if updated:
                self.request_manager.update_request(request)

            if to_be_activated:
                self.activated_requests.append(request)

        self.request_manager.unlock()

        for item, site in activation_list:
            try:
                site_blocks = blocks_to_propose[site]
            except KeyError:
                site_blocks = blocks_to_propose[site] = {}

            if type(item) is Dataset:
                site_blocks[item] = set(item.blocks)
            else:
                dataset = item.dataset
                try:
                    blocks = site_blocks[dataset]
                except KeyError:
                    blocks = site_blocks[dataset] = set()

                blocks.add(item)

        ## deal with new requests
        self.request_manager.lock()
        new_requests = self.request_manager.get_requests(
            statuses=[Request.ST_NEW])

        def reject(request, reason):
            request.status = Request.ST_REJECTED
            request.reject_reason = reason
            self.request_manager.update_request(request)

        for request in new_requests.itervalues():
            try:
                group = inventory.groups[request.group]
            except KeyError:
                reject(request, 'Invalid group name %s' % request.group)
                continue

            invalid_items = []
            datasets = request.find_items(inventory, invalid_items)
            sites = filter(lambda s: s in policy.target_sites,
                           request.find_sites(inventory))

            if len(invalid_items) != 0:
                reject(request,
                       'Invalid item names: [%s]' % ','.join(invalid_items))
                continue

            if len(sites) == 0:
                reject(request, 'Target sites not available for transfers')
                continue

            # convert to DealerRequests
            proto_dealer_requests = []

            # process the items list
            for dataset, blocks in datasets.iteritems():
                if blocks is None:
                    if dataset.size > self.max_size:
                        reject(
                            request, 'Dataset %s is too large (>%.0f TB)' %
                            (dataset.name, self.max_size * 1.e-12))
                        break

                    item = dataset

                else:
                    total_size = sum(b.size for b in blocks)

                    if total_size > self.max_size:
                        reject(
                            request,
                            'Request size for %s too large (>%.0f TB)' %
                            (dataset.name, self.max_size * 1.e-12))
                        break

                    if total_size > float(
                            dataset.size) * self.block_request_max:
                        # if the total size of requested blocks is large enough, just copy the dataset
                        # covers the case where we actually have the full list of blocks (if block_request_max is less than 1)
                        item = dataset
                    else:
                        item = list(blocks)

                proto_dealer_requests.append(DealerRequest(item, group=group))

            if request.status == Request.ST_REJECTED:
                continue

            # list of (item, site) to be activated (not necessarily proposed to dealer - there can be another request for the same item-site)
            activation_list = []
            # list of dealer proposals
            new_dealer_requests = []

            # find destinations (request.n times) for each item
            for proto_request in proto_dealer_requests:
                # try to make a dealer request for all requests, except when there is a full copy of the item

                if request.n == 0:
                    # make one copy at each site

                    for destination in sites:
                        dealer_request = DealerRequest(proto_request.item(),
                                                       destination=destination)

                        if dealer_request.item_already_exists() == 2:
                            # nothing to do for this one
                            continue

                        rejection_reason = policy.check_destination(
                            dealer_request, partition)
                        if rejection_reason is not None:
                            reject(
                                request, 'Cannot copy %s to %s' %
                                (dealer_request.item_name(), destination.name))
                            break

                        new_dealer_requests.append(dealer_request)

                    if request.status == Request.ST_REJECTED:
                        break

                else:
                    # total of n copies
                    candidate_sites = []
                    num_new = request.n

                    # bring sites where the item already exists first (may want to just "flip" the ownership)
                    sites_and_existence = []
                    for destination in sites:
                        exists = proto_request.item_already_exists(
                            destination)  # 0, 1, or 2
                        if exists != 0:
                            sites_and_existence.insert(0,
                                                       (destination, exists))
                        else:
                            sites_and_existence.append((destination, exists))

                    for destination, exists in sites_and_existence:
                        if num_new == 0:
                            break

                        dealer_request = DealerRequest(proto_request.item(),
                                                       destination=destination)

                        # copies proposed by other requests -> just activate
                        try:
                            proposed_blocks = blocks_to_propose[destination][
                                dealer_request.dataset]
                        except KeyError:
                            pass
                        else:
                            if dealer_request.blocks is not None:
                                if set(dealer_request.blocks
                                       ) <= proposed_blocks:
                                    num_new -= 1
                                    for block in dealer_request.blocks:
                                        activation_list.append(
                                            (block.full_name(),
                                             dealer_request.destination.name,
                                             now))

                                    continue

                            else:
                                if dealer_request.dataset.blocks == proposed_blocks:
                                    num_new -= 1
                                    activation_list.append(
                                        (dealer_request.item_name(),
                                         dealer_request.destination.name, now))

                                    continue

                        # if the item already exists, it's a complete copy - don't activate, don't propose
                        if exists == 2:
                            num_new -= 1
                        elif exists == 1:
                            # if the current group can be overwritten, make a request
                            # otherwise skip
                            single_owner = dealer_request.item_owned_by(
                            )  # None if owned by multiple groups
                            if single_owner is not None and single_owner in overwritten_groups:
                                new_dealer_requests.append(dealer_request)
                                num_new -= 1
                        else:
                            candidate_sites.append(destination)

                    for icopy in range(num_new):
                        dealer_request = DealerRequest(proto_request.item())
                        # pick a destination randomly (weighted by available space)
                        policy.find_destination_for(dealer_request,
                                                    partition,
                                                    candidates=candidate_sites)

                        if dealer_request.destination is None:
                            # if any of the item cannot find any of the num_new destinations, reject the request
                            reject(
                                request,
                                'Destination %d for %s not available' %
                                (icopy, dealer_request.item_name()))
                            break

                        candidate_sites.remove(dealer_request.destination)
                        new_dealer_requests.append(dealer_request)

                # if request.n == 0, else

                if request.status == Request.ST_REJECTED:
                    break

            # for each item in request

            if request.status == Request.ST_REJECTED:
                continue

            if len(new_dealer_requests) == 0 and len(activation_list) == 0:
                # nothing to do
                request.status = Request.ST_COMPLETED
                self.request_manager.update_request(request)
                continue

            # finally add to the returned requests
            for dealer_request in new_dealer_requests:
                try:
                    site_blocks = blocks_to_propose[dealer_request.destination]
                except KeyError:
                    site_blocks = blocks_to_propose[
                        dealer_request.destination] = {}

                if dealer_request.blocks is not None:
                    try:
                        blocks = site_blocks[dealer_request.dataset]
                    except KeyError:
                        blocks = site_blocks[dealer_request.dataset] = set()

                    blocks.update(dealer_request.blocks)

                    for block in dealer_request.blocks:
                        activation_list.append(
                            (block.full_name(),
                             dealer_request.destination.name, now))

                else:
                    site_blocks[dealer_request.dataset] = set(
                        dealer_request.dataset.blocks)

                    activation_list.append(
                        (dealer_request.item_name(),
                         dealer_request.destination.name, now))

            # create actions and set request status to ACTIVATED
            request.activate(activation_list)
            self.request_manager.update_request(request)

            self.activated_requests.append(request)

        self.request_manager.unlock()

        # throw away all the DealerRequest objects we've been using and form the final proposal
        dealer_requests = []
        for site, block_list in blocks_to_propose.iteritems():
            for dataset, blocks in block_list.iteritems():
                if blocks == dataset.blocks:
                    dealer_requests.append(
                        DealerRequest(dataset, destination=site))
                else:
                    dealer_requests.append(
                        DealerRequest(list(blocks), destination=site))

        return dealer_requests
Exemple #15
0
    def get_requests(self, inventory, policy): #override
        """
        1. Request all active transfers in new state (these were not queued in the last cycle)
        2. Find all transfer requests with status new.
        3. Decide whether to accept the request. Set status accordingly.
        4. Find the destinations if wildcard was used.
        """

        partition = inventory.partitions[policy.partition_name]

        overwritten_groups = [inventory.groups[name] for name in self.overwritten_groups]

        self.activated_requests = []
        
        # full list of blocks to be proposed to Dealer
        blocks_to_propose = {} # {site: {dataset: set of blocks}}

        now = int(time.time())

        # Re-request new actions within activated requests

        self.request_manager.lock()
        active_requests = self.request_manager.get_requests(statuses = [Request.ST_ACTIVATED])

        activation_list = []

        for request in active_requests.itervalues():
            updated = False
            to_be_activated = False

            for action in request.actions:
                if action.status != RequestAction.ST_NEW:
                    continue

                try:
                    site = inventory.sites[action.site]
                except KeyError:
                    action.status = RequestAction.ST_FAILED
                    action.last_update = now
                    updated = True
                    continue
    
                try:
                    dataset_name, block_name = Block.from_full_name(action.item)
    
                except ObjectError:
                    # action.item is (supposed to be) a dataset name

                    try:
                        dataset = inventory.datasets[action.item]
                    except KeyError:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True                        
                        continue

                    existing_replica = site.find_dataset_replica(dataset)

                    if existing_replica is not None:
                        if existing_replica.is_complete():
                            action.status = RequestAction.ST_COMPLETED
                        else:
                            # it was queued by someone
                            action.status = RequestAction.ST_QUEUED
                        action.last_update = now
                        updated = True

                    else:
                        activation_list.append((dataset, site))
                        to_be_activated = True
    
                else:
                    # action.item is a block name
    
                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True                        
                        continue
    
                    block = dataset.find_block(block_name)
    
                    if block is None:
                        action.status = RequestAction.ST_FAILED
                        action.last_update = now
                        updated = True                        
                        continue

                    existing_replica = block.find_replica(site)

                    if existing_replica is not None:
                        if existing_replica.is_complete():
                            action.status = RequestAction.ST_COMPLETED
                        else:
                            action.status = RequestAction.ST_QUEUED
                        action.last_update = now
                        updated = True

                    else:
                        activation_list.append((block, site))
                        to_be_activated = True

            if updated:
                self.request_manager.update_request(request)

            if to_be_activated:
                self.activated_requests.append(request)

        self.request_manager.unlock()

        for item, site in activation_list:
            try:
                site_blocks = blocks_to_propose[site]
            except KeyError:
                site_blocks = blocks_to_propose[site] = {}

            if type(item) is Dataset:
                site_blocks[item] = set(item.blocks)
            else:
                dataset = item.dataset
                try:
                    blocks = site_blocks[dataset]
                except KeyError:
                    blocks = site_blocks[dataset] = set()

                blocks.add(item)

        ## deal with new requests
        self.request_manager.lock()
        new_requests = self.request_manager.get_requests(statuses = [Request.ST_NEW])

        def reject(request, reason):
            request.status = Request.ST_REJECTED
            request.reject_reason = reason
            self.request_manager.update_request(request)

        for request in new_requests.itervalues():
            try:
                group = inventory.groups[request.group]
            except KeyError:
                reject(request, 'Invalid group name %s' % request.group)
                continue

            invalid_items = []
            datasets = request.find_items(inventory, invalid_items)
            sites = filter(lambda s: s in policy.target_sites, request.find_sites(inventory))

            if len(invalid_items) != 0:
                reject(request, 'Invalid item names: [%s]' % ','.join(invalid_items))
                continue

            if len(sites) == 0:
                reject(request, 'Target sites not available for transfers')
                continue

            # convert to DealerRequests
            proto_dealer_requests = []

            # process the items list
            for dataset, blocks in datasets.iteritems():
                if blocks is None:
                    if dataset.size > self.max_size:
                        reject(request, 'Dataset %s is too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12))
                        break

                    item = dataset

                else:
                    total_size = sum(b.size for b in blocks)

                    if total_size > self.max_size:
                        reject(request, 'Request size for %s too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12))
                        break

                    if total_size > float(dataset.size) * self.block_request_max:
                        # if the total size of requested blocks is large enough, just copy the dataset
                        # covers the case where we actually have the full list of blocks (if block_request_max is less than 1)
                        item = dataset
                    else:
                        item = list(blocks)

                proto_dealer_requests.append(DealerRequest(item, group = group))

            if request.status == Request.ST_REJECTED:
                continue

            # list of (item, site) to be activated (not necessarily proposed to dealer - there can be another request for the same item-site)
            activation_list = []
            # list of dealer proposals
            new_dealer_requests = []

            # find destinations (request.n times) for each item
            for proto_request in proto_dealer_requests:
                # try to make a dealer request for all requests, except when there is a full copy of the item

                if request.n == 0:
                    # make one copy at each site

                    for destination in sites:
                        dealer_request = DealerRequest(proto_request.item(), destination = destination)

                        if dealer_request.item_already_exists() == 2:
                            # nothing to do for this one
                            continue

                        rejection_reason = policy.check_destination(dealer_request, partition)
                        if rejection_reason is not None:
                            reject(request, 'Cannot copy %s to %s' % (dealer_request.item_name(), destination.name))
                            break
    
                        new_dealer_requests.append(dealer_request)

                    if request.status == Request.ST_REJECTED:
                        break

                else:
                    # total of n copies
                    candidate_sites = []
                    num_new = request.n

                    # bring sites where the item already exists first (may want to just "flip" the ownership)
                    sites_and_existence = []
                    for destination in sites:
                        exists = proto_request.item_already_exists(destination) # 0, 1, or 2
                        if exists != 0:
                            sites_and_existence.insert(0, (destination, exists))
                        else:
                            sites_and_existence.append((destination, exists))

                    for destination, exists in sites_and_existence:
                        if num_new == 0:
                            break

                        dealer_request = DealerRequest(proto_request.item(), destination = destination)

                        # copies proposed by other requests -> just activate
                        try:
                            proposed_blocks = blocks_to_propose[destination][dealer_request.dataset]
                        except KeyError:
                            pass
                        else:
                            if dealer_request.blocks is not None:
                                if set(dealer_request.blocks) <= proposed_blocks:
                                    num_new -= 1
                                    for block in dealer_request.blocks:
                                        activation_list.append((block.full_name(), dealer_request.destination.name, now))

                                    continue

                            else:
                                if dealer_request.dataset.blocks == proposed_blocks:
                                    num_new -= 1
                                    activation_list.append((dealer_request.item_name(), dealer_request.destination.name, now))

                                    continue

                        # if the item already exists, it's a complete copy - don't activate, don't propose
                        if exists == 2:
                            num_new -= 1
                        elif exists == 1:
                            # if the current group can be overwritten, make a request
                            # otherwise skip
                            single_owner = dealer_request.item_owned_by() # None if owned by multiple groups
                            if single_owner is not None and single_owner in overwritten_groups:
                                new_dealer_requests.append(dealer_request)
                                num_new -= 1
                        else:
                            candidate_sites.append(destination)

                    for icopy in range(num_new):
                        dealer_request = DealerRequest(proto_request.item())
                        # pick a destination randomly (weighted by available space)
                        policy.find_destination_for(dealer_request, partition, candidates = candidate_sites)
    
                        if dealer_request.destination is None:
                            # if any of the item cannot find any of the num_new destinations, reject the request
                            reject(request, 'Destination %d for %s not available' % (icopy, dealer_request.item_name()))
                            break
    
                        candidate_sites.remove(dealer_request.destination)
                        new_dealer_requests.append(dealer_request)

                # if request.n == 0, else

                if request.status == Request.ST_REJECTED:
                    break

            # for each item in request

            if request.status == Request.ST_REJECTED:
                continue

            if len(new_dealer_requests) == 0 and len(activation_list) == 0:
                # nothing to do
                request.status = Request.ST_COMPLETED
                self.request_manager.update_request(request)
                continue

            # finally add to the returned requests
            for dealer_request in new_dealer_requests:
                try:
                    site_blocks = blocks_to_propose[dealer_request.destination]
                except KeyError:
                    site_blocks = blocks_to_propose[dealer_request.destination] = {}

                if dealer_request.blocks is not None:
                    try:
                        blocks = site_blocks[dealer_request.dataset]
                    except KeyError:
                        blocks = site_blocks[dealer_request.dataset] = set()
    
                    blocks.update(dealer_request.blocks)

                    for block in dealer_request.blocks:
                        activation_list.append((block.full_name(), dealer_request.destination.name, now))

                else:
                    site_blocks[dealer_request.dataset] = set(dealer_request.dataset.blocks)

                    activation_list.append((dealer_request.item_name(), dealer_request.destination.name, now))

            # create actions and set request status to ACTIVATED
            request.activate(activation_list)
            self.request_manager.update_request(request)
            
            self.activated_requests.append(request)

        self.request_manager.unlock()

        # throw away all the DealerRequest objects we've been using and form the final proposal
        dealer_requests = []
        for site, block_list in blocks_to_propose.iteritems():
            for dataset, blocks in block_list.iteritems():
                if blocks == dataset.blocks:
                    dealer_requests.append(DealerRequest(dataset, destination = site))
                else:
                    dealer_requests.append(DealerRequest(list(blocks), destination = site))

        return dealer_requests
    def get_list(self, inventory):
        all_locks = []  # [(item, site)]

        for source, content_type, site_pattern, lock_url in self._sources.itervalues(
        ):
            if lock_url is not None and isinstance(lock_url, basestring):
                # check that the lock files themselves are not locked
                while True:
                    # Hacky but this is temporary any way
                    opener = urllib2.build_opener(
                        webservice.HTTPSCertKeyHandler(Configuration()))
                    opener.addheaders.append(('Accept', 'application/json'))
                    request = urllib2.Request(lock_url)
                    try:
                        opener.open(request)
                    except urllib2.HTTPError as err:
                        if err.code == 404:
                            # file not found -> no lock
                            break
                        else:
                            raise

                    LOG.info(
                        'Lock files are being produced. Waiting 60 seconds.')
                    time.sleep(60)
            elif not isinstance(lock_url, basestring):
                # lock_url is a tuple of Oracle db queries (a,b): a - checking for lock of locks, b - locks themselves
                # source is automatically an OracleService
                try:
                    locked = True
                    while locked:
                        locked = False
                        locks = source.make_request(lock_url[0].replace(
                            '`', '"'))
                        for lock in locks:
                            if lock:
                                locked = True
                                break
                        if not locked:
                            break

                        LOG.info(
                            'Locks are being produced. Waiting 60 seconds.')
                        time.sleep(60)
                except:
                    e = sys.exc_info()[0]
                    LOG.error(e)
                    pass

            if site_pattern is None:
                site_re = None
            else:
                site_re = re.compile(fnmatch.translate(site_pattern))

            LOG.info('Retrieving lock information from %s', source)

            try:
                data = source.make_request()
                LOG.info("This was a standard WEB RESTAPI request")
            except TypeError:
                # OracleService expects a query text
                data = source.make_request(lock_url[1].replace('`', '"'))
                LOG.info("This was an Oracle DB request")

            if content_type == WebReplicaLock.LIST_OF_DATASETS:
                # simple list of datasets
                for dataset_name in data:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s', source)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source)
                        continue

                    if site_re is not None:
                        for replica in dataset.replicas:
                            if not site_re.match(replica.site.name):
                                continue

                            all_locks.append((dataset, replica.site))
                    else:
                        all_locks.append((dataset, None))

            elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS:
                # data['result'] -> simple list of datasets
                for dataset_name in data['result']:
                    if dataset_name is None:
                        LOG.debug('Dataset name %s None found in %s',
                                  (dataset_name, source.url_base))
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if site_re is not None:
                        for replica in dataset.replicas:
                            if not site_re.match(replica.site.name):
                                continue

                            all_locks.append((dataset, replica.site))
                    else:
                        all_locks.append((dataset, None))

            elif content_type == WebReplicaLock.CMSWEB_LIST_OF_PARENT_DATASETS:
                # data['result'][0]['parentlocks'] -> simple list of datasets
                for dataset_name in data['result'][0]['parentlocks']:
                    if dataset_name is None:
                        LOG.debug('Dataset name %s None found in %s',
                                  (dataset_name, source.url_base))
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if site_re is not None:
                        for replica in dataset.replicas:
                            if not site_re.match(replica.site.name):
                                continue

                            all_locks.append((dataset, replica.site))
                    else:
                        all_locks.append((dataset, None))

            elif content_type == WebReplicaLock.SITE_TO_DATASETS:
                # data = {site: {dataset: info}}
                for site_name, objects in data.items():
                    try:
                        site = inventory.sites[site_name]
                    except KeyError:
                        LOG.debug('Unknown site %s in %s', site_name,
                                  source.url_base)
                        continue

                    for object_name, info in objects.items():
                        if not info['lock']:
                            LOG.debug('Object %s is not locked at %s',
                                      object_name, site_name)
                            continue

                        try:
                            dataset_name, block_name = Block.from_full_name(
                                object_name)
                        except ObjectError:
                            dataset_name, block_name = object_name, None

                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            LOG.debug('Unknown dataset %s in %s', dataset_name,
                                      source.url_base)
                            continue

                        replica = site.find_dataset_replica(dataset)
                        if replica is None:
                            LOG.debug('Replica of %s is not at %s in %s',
                                      dataset_name, site_name, source.url_base)
                            continue

                        if block_name is None:
                            all_locks.append((dataset, site))
                        else:
                            block_replica = replica.find_block_replica(
                                block_name)
                            if block_replica is None:
                                LOG.debug('Unknown block %s in %s',
                                          object_name, source.url_base)
                                continue

                            all_locks.append((block_replica.block, site))

        return all_locks
    def get_replicas(self, site=None, dataset=None, block=None):  #override
        if site is None:
            site_check = self.check_allowed_site
        else:
            site_check = None
            if not self.check_allowed_site(site):
                return []

        if dataset is None and block is None:
            dataset_check = self.check_allowed_dataset
        else:
            dataset_check = None
            if dataset is not None:
                if not self.check_allowed_dataset(dataset):
                    return []
            if block is not None:
                if not self.check_allowed_dataset(block[:block.find('#')]):
                    return []

        options = []
        if site is not None:
            options.append('node=' + site)
        if dataset is not None:
            options.append('dataset=' + dataset)
        if block is not None:
            options.append('block=' + block)

        LOG.info('get_replicas(' + ','.join(options) +
                 ')  Fetching the list of replicas from PhEDEx')

        if len(options) == 0:
            return []

        block_entries = self._phedex.make_request('blockreplicas',
                                                  options,
                                                  timeout=7200)

        parallelizer = Map()
        parallelizer.timeout = 7200

        # Automatically starts a thread as we add the output of block_entries
        combine_file = parallelizer.get_starter(self._combine_file_info)

        for block_entry in block_entries:
            for replica_entry in block_entry['replica']:
                if replica_entry['complete'] == 'n':
                    break
            else:
                continue

            # there is at least one incomplete replica
            try:
                dataset_name, block_name = Block.from_full_name(
                    block_entry['name'])
            except ObjectError:  # invalid name
                continue

            if dataset_check and not dataset_check(dataset_name):
                continue

            combine_file.add_input(block_entry)

        combine_file.close()

        # _combine_file_info alters block_entries directly - no need to deal with output
        combine_file.get_outputs()

        block_replicas = PhEDExReplicaInfoSource.make_block_replicas(
            block_entries,
            PhEDExReplicaInfoSource.maker_blockreplicas,
            site_check=site_check,
            dataset_check=dataset_check)

        # Also use subscriptions call which has a lower latency than blockreplicas
        # For example, group change on a block replica at time T may not show up in blockreplicas until up to T + 15 minutes
        # while in subscriptions it is visible within a few seconds
        # But subscriptions call without a dataset or block takes too long
        if dataset is None and block is None:
            return block_replicas

        indexed = collections.defaultdict(dict)
        for replica in block_replicas:
            indexed[(replica.site.name,
                     replica.block.dataset.name)][replica.block.name] = replica

        dataset_entries = self._phedex.make_request('subscriptions',
                                                    options,
                                                    timeout=3600)

        for dataset_entry in dataset_entries:
            dataset_name = dataset_entry['name']

            if not self.check_allowed_dataset(dataset_name):
                continue

            try:
                subscriptions = dataset_entry['subscription']
            except KeyError:
                pass
            else:
                for sub_entry in subscriptions:
                    site_name = sub_entry['node']

                    if not self.check_allowed_site(site_name):
                        continue

                    replicas = indexed[(site_name, dataset_name)]

                    for replica in replicas.itervalues():
                        replica.group = Group(sub_entry['group'])
                        replica.is_custodial = (sub_entry['custodial'] == 'y')

            try:
                block_entries = dataset_entry['block']
            except KeyError:
                pass
            else:
                for block_entry in block_entries:
                    try:
                        _, block_name = Block.from_full_name(
                            block_entry['name'])
                    except ObjectError:
                        continue

                    try:
                        subscriptions = block_entry['subscription']
                    except KeyError:
                        continue

                    for sub_entry in subscriptions:
                        site_name = sub_entry['node']

                        if not self.check_allowed_site(site_name):
                            continue

                        try:
                            replica = indexed[(site_name,
                                               dataset_name)][block_name]
                        except KeyError:
                            continue

                        replica.group = Group(sub_entry['group'])

                        if sub_entry['node_bytes'] == block_entry['bytes']:
                            # complete
                            replica.size = sub_entry['node_bytes']
                            if replica.size is None:
                                replica.size = 0
                            replica.files = None
                        else:
                            # incomplete - since we cannot know what files are there, we'll just have to pretend there is none
                            replica.size = 0
                            replica.files = tuple()

                        replica.is_custodial = (sub_entry['custodial'] == 'y')

                        if sub_entry['time_update'] is not None:
                            replica.last_update = 0
                        else:
                            replica.last_update = int(sub_entry['time_update'])

        return block_replicas
    def get_updated_replicas(self, updated_since, inventory):  #override
        LOG.info(
            'get_updated_replicas(%d)  Fetching the list of replicas from PhEDEx',
            updated_since)

        nodes = []
        for entry in self._phedex.make_request('nodes', timeout=600):
            if not self.check_allowed_site(entry['name']):
                continue

            if entry['name'] not in inventory.sites:
                continue

            nodes.append(entry['name'])

        try:
            tmpconfig = Configuration(
                self._parallelizer_config.get('parallel', None))
        except Exception as e:
            LOG.error(str(e))
            tmpconfig = Configuration()

        parallelizer = Map(tmpconfig)
        parallelizer.timeout = 5400

        def get_node_replicas(node):
            options = ['update_since=%d' % updated_since, 'node=%s' % node]
            results = self._phedex.make_request('blockreplicas', options)

            return node, results

        # Use async to fire threads on demand
        node_results = parallelizer.execute(get_node_replicas,
                                            nodes,
                                            async=True)

        # Automatically starts a thread as we add the output of block_replicas
        combine_file = parallelizer.get_starter(self._combine_file_info)

        all_block_entries = []

        for node, block_entries in node_results:
            site = inventory.sites[node]

            for block_entry in block_entries:
                all_block_entries.append(block_entry)

                replica_entry = block_entry['replica'][0]

                if replica_entry['complete'] == 'y':
                    continue

                # incomplete block replica - should we fetch file info?
                try:
                    dataset_name, block_name = Block.from_full_name(
                        block_entry['name'])
                except ObjectError:
                    pass
                else:
                    try:
                        dataset = inventory.datasets[dataset_name]
                        block = dataset.find_block(block_name)
                        replica = block.find_replica(site)
                        if replica.file_ids is None:
                            num_files = block.num_files
                        else:
                            num_files = len(replica.file_ids)

                        if replica.size == replica_entry[
                                'bytes'] and num_files == replica_entry[
                                    'files']:
                            # no we don't have to
                            continue
                    except:
                        # At any point of the above lookups we may hit a None object or KeyError or what not
                        pass

                LOG.debug(
                    'Replica %s:%s is incomplete. Fetching file information.',
                    replica_entry['node'], block_entry['name'])
                combine_file.add_input(block_entry)

        combine_file.close()

        # _combine_file_info alters block_entries directly - no need to deal with output
        combine_file.get_outputs()

        LOG.info('get_updated_replicas(%d) Got outputs' % updated_since)

        return PhEDExReplicaInfoSource.make_block_replicas(
            all_block_entries,
            PhEDExReplicaInfoSource.maker_blockreplicas,
            dataset_check=self.check_allowed_dataset)
Exemple #19
0
    def load(self, inventory):
        for dataset in inventory.datasets.itervalues():
            try:
                dataset.attr.pop('locked_blocks')
            except KeyError:
                pass

        if len(self.users) != 0:
            entries = self._mysql.select_many('detox_locks', ('item', 'sites', 'groups'), ('user_id', 'role_id'), self.users)
        else:
            query = 'SELECT `item`, `sites`, `groups` FROM `detox_locks`'
            entries = self._mysql.query(query)

        for item_name, sites_pattern, groups_pattern in entries:
            # wildcard not allowed in block name
            try:
                dataset_pattern, block_name = Block.from_full_name(item_name)
            except ObjectError:
                dataset_pattern, block_name = item_name, None

            if '*' in dataset_pattern:
                pat_exp = re.compile(fnmatch.translate(dataset_pattern))
                
                datasets = []
                for dataset in inventory.datasets.values():
                    # this is highly inefficient but I can't think of a better way
                    if pat_exp.match(dataset.name):
                        datasets.append(dataset)
            else:
                try:
                    dataset = inventory.datasets[dataset_pattern]
                except KeyError:
                    LOG.debug('Cannot lock unknown dataset %s', dataset_pattern)
                    continue

                datasets = [dataset]

            specified_sites = []
            if sites_pattern:
                if sites_pattern == '*':
                    pass
                elif '*' in sites_pattern:
                    pat_exp = re.compile(fnmatch.translate(sites_pattern))
                    specified_sites.extend(s for n, s in inventory.sites.iteritems() if pat_exp.match(n))
                else:
                    try:
                        specified_sites.append(inventory.sites[sites_pattern])
                    except KeyError:
                        pass

            specified_groups = []
            if groups_pattern:
                if groups_pattern == '*':
                    pass
                elif '*' in groups_pattern:
                    pat_exp = re.compile(fnmatch.translate(groups_pattern))
                    specified_groups.extend(g for n, g in inventory.groups.iteritems() if pat_exp.match(n))
                else:
                    try:
                        specified_groups.append(inventory.groups[groups_pattern])
                    except KeyError:
                        pass

            for dataset in datasets:
                sites = set(specified_sites)
                groups = set(specified_groups)

                if len(sites) == 0:
                    # either sites_pattern was not given (global lock) or no sites matched (typo?)
                    # we will treat this as a global lock
                    sites.update(r.site for r in dataset.replicas)
    
                if len(groups) == 0:
                    # if no group matches the pattern, we will be on the safe side and treat it as a global lock
                    for replica in dataset.replicas:
                        groups.update(brep.group for brep in replica.block_replicas)
    
                try:
                    locked_blocks = dataset.attr['locked_blocks']
                except KeyError:
                    locked_blocks = dataset.attr['locked_blocks'] = {}

                if block_name is None:
                    for replica in dataset.replicas:
                        if replica.site not in sites:
                            continue
        
                        if replica.site not in locked_blocks:
                            locked_blocks[replica.site] = set()
        
                        for block_replica in replica.block_replicas:
                            if block_replica.group not in groups:
                                continue
        
                            locked_blocks[replica.site].add(block_replica.block)
                else:
                    block = dataset.find_block(block_name)
                    if block is None:
                        LOG.debug('Cannot lock unknown block %s', block_name)
                        continue

                    for replica in block.replicas:
                        if replica.site not in sites:
                            continue

                        if replica.group not in groups:
                            continue
        
                        if replica.site not in locked_blocks:
                            locked_blocks[replica.site] = set([block])
                        else:
                            locked_blocks[replica.site].add(block)
                            
        for dataset in inventory.datasets.itervalues():
            try:
                locked_blocks = dataset.attr['locked_blocks']
            except KeyError:
                continue

            for site, blocks in locked_blocks.items():
                if blocks is None:
                    continue

                # if all blocks are locked, set to None (dataset-level lock)
                if blocks == dataset.blocks:
                    locked_blocks[site] = None

        LOG.info('Locked %d items.', len(entries))
Exemple #20
0
    def get_file(self, name, block=None):
        ## Get the file data from PhEDEx

        result = self._phedex.make_request('data',
                                           ['file=' + name, 'level=file'])

        try:
            block_entry = result[0]['dataset'][0]['block'][0]
            file_entry = block_entry['file'][0]
        except:
            return None

        bname = block_entry['name']
        block_name = Block.to_internal_name(bname[bname.find('#') + 1:])

        if block is None:
            link_block = False
            # Just need a named object
            dataset = Dataset(dataset_entry['name'])
            block = Block(block_name, dataset)
        else:
            link_block = True
            if block.name != block_name:
                raise IntegrityError(
                    'Inconsistent block %s passed to get_file(%s)',
                    block.full_name(), name)

        lfile = self._create_file(file_entry, block)

        if link_block:
            # Caution - by adding this file we edit the block properties too

            existing = block.find_file(lfile.fid())
            if existing is None:
                block.add_file(lfile)
            else:
                block.remove_file(existing)
                block.add_file(lfile)

        return lfile
    def run(self, caller, request, inventory):
        if caller.name not in self.authorized_users:
            raise AuthorizationError()

        try:
            item = request['item']
        except KeyError:
            raise MissingParameter('item')

        if type(item) is list:
            items = item
        else:
            items = [item]

        invalidated_items = []

        sql = 'INSERT INTO `invalidations` (`item`, `db`, `user_id`, `timestamp`) VALUES (%s, %s, %s, NOW())'

        for item in items:
            invalidated = False

            if item in inventory.datasets:
                # item is a dataset

                result = self.dbs.make_request('datasets', [
                    'dataset=' + item, 'dataset_access_type=*', 'detail=true'
                ])
                if len(result) != 0:
                    status = result[0]['dataset_access_type']
                    if status in ('VALID', 'PRODUCTION'):
                        self.registry.db.query(sql, item, 'dbs', caller.id)

                    for entry in self.dbs.make_request(
                            'files', ['dataset=' + item, 'validFileOnly=1']):
                        self.registry.db.query(sql, entry['logical_file_name'],
                                               'dbs', caller.id)

                    invalidated = True

                result = self.phedex.make_request(
                    'data', ['dataset=' + item, 'level=block'])
                if len(result) != 0:
                    self.registry.db.query(sql, item, 'tmdb', caller.id)
                    invalidated = True

            else:
                try:
                    dataset_name, block_name = Block.from_full_name(item)
                except:
                    lfile = inventory.find_file(item)
                    if lfile is not None:
                        # item is a file

                        result = self.dbs.make_request(
                            'files',
                            ['logical_file_name=' + item, 'validFileOnly=1'])
                        if len(result) != 0:
                            self.registry.db.query(
                                sql, result[0]['logical_file_name'], 'dbs',
                                caller.id)
                            invalidated = True

                        result = self.phedex.make_request(
                            'data', ['file=' + item])
                        if len(result) != 0:
                            self.registry.db.query(sql, item, 'tmdb',
                                                   caller.id)
                            invalidated = True

                else:
                    # item is a block

                    for entry in self.dbs.make_request(
                            'files',
                        ['block_name=' + item, 'validFileOnly=1']):
                        self.registry.db.query(sql, entry['logical_file_name'],
                                               'dbs', caller.id)
                        invalidated = True

                    result = self.phedex.make_request(
                        'data', ['block=' + item, 'level=block'])
                    if len(result) != 0:
                        self.registry.db.query(sql, item, 'tmdb', caller.id)
                        invalidated = True

            if invalidated:
                invalidated_items.append({'item': item})

        return invalidated_items
Exemple #22
0
    def _build_partition(self, inventory):
        """Create a mini-inventory consisting only of replicas in the partition."""

        partition_repository = ObjectRepository()
        partition_repository._store = inventory._store

        LOG.info('Identifying target sites.')

        partition = inventory.partitions[self.policy.partition_name]

        partition.embed_tree(partition_repository)

        # Ask each site if deletion should be triggered.
        target_sites = set()  # target sites of this detox cycle
        tape_is_target = False
        for site in inventory.sites.itervalues():
            # target_site_defs are SiteConditions, which take site_partition as the argument
            site_partition = site.partitions[partition]

            for targdef in self.policy.target_site_def:
                if targdef.match(site_partition):
                    target_sites.add(site)
                    if site.storage_type == Site.TYPE_MSS:
                        tape_is_target = True

                    break

        if len(target_sites) == 0:
            LOG.info('No site matches the target definition.')
            return partition_repository

        # Safety measure - if there are empty (no block rep) tape replicas, create block replicas with size 0 and
        # add them into the partition. We will not report back to the main process though (i.e. won't call inventory.update).
        if tape_is_target:
            for site in filter(lambda s: s.storage_type == Site.TYPE_MSS,
                               target_sites):
                for replica in site.dataset_replicas():
                    if len(replica.block_replicas) != 0:
                        continue

                    for block in replica.dataset.blocks:
                        block_replica = BlockReplica(block,
                                                     site,
                                                     Group.null_group,
                                                     size=0)
                        replica.block_replicas.add(block_replica)
                        block.replicas.add(block_replica)

                    # Add to the site partition
                    site.partitions[partition].replicas[replica] = None

        # Create a copy of the inventory, limiting to the current partition
        # We will be stripping replicas off the image as we process the policy in iterations
        LOG.info('Creating a partition image.')

        for group in inventory.groups.itervalues():
            group.embed_into(partition_repository)

        # Now clone the sites, datasets, and replicas
        # Basically a copy-paste of various embed_into() functions ommitting the checks

        # make a map to avoid excessive lookups
        block_to_clone = {}
        for site in target_sites:
            site_clone = site.embed_into(partition_repository)

            site_partition = site.partitions[partition]
            site_partition_clone = site_partition.embed_tree(
                partition_repository)

            for dataset_replica, block_replica_set in site_partition.replicas.iteritems(
            ):
                dataset = dataset_replica.dataset

                try:
                    dataset_clone = partition_repository.datasets[dataset.name]

                except KeyError:
                    dataset_clone = dataset.embed_into(partition_repository)

                    for block in dataset.blocks:
                        block_clone = Block(block.name,
                                            dataset_clone,
                                            size=block.size,
                                            num_files=block.num_files,
                                            is_open=block.is_open,
                                            last_update=block.last_update,
                                            bid=block.id)
                        dataset_clone.blocks.add(block_clone)

                        block_to_clone[block] = block_clone

                if dataset_replica.group is None:
                    group = None
                else:
                    group = partition_repository.groups[
                        dataset_replica.group.name]

                replica_clone = DatasetReplica(dataset_clone,
                                               site_clone,
                                               growing=dataset_replica.growing,
                                               group=group)
                dataset_clone.replicas.add(replica_clone)
                site_clone.add_dataset_replica(replica_clone,
                                               add_block_replicas=False)

                if block_replica_set is None:
                    # all block reps in partition
                    block_replica_set = dataset_replica.block_replicas
                    full_replica = True
                    site_partition_clone.replicas[replica_clone] = None
                else:
                    full_replica = False
                    block_replica_clone_set = site_partition_clone.replicas[
                        replica_clone] = set()

                for block_replica in block_replica_set:
                    block_clone = block_to_clone[block_replica.block]
                    if block_replica.is_complete():
                        size = -1
                    else:
                        size = block_replica.size

                    block_replica_clone = BlockReplica(
                        block_clone,
                        site_clone,
                        partition_repository.groups[block_replica.group.name],
                        is_custodial=block_replica.is_custodial,
                        size=size,
                        last_update=block_replica.last_update,
                        file_ids=block_replica.file_ids)

                    replica_clone.block_replicas.add(block_replica_clone)
                    block_clone.replicas.add(block_replica_clone)

                    if not full_replica:
                        block_replica_clone_set.add(block_replica_clone)

        return partition_repository