Beispiel #1
0
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id)

        result = []

        for replica in replica_list:
            # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block)
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)

            for block_replica in replica.block_replicas:
                LOG.debug('Subscribing files for %s', str(block_replica))

                if block_replica.file_ids is None:
                    LOG.debug('No file to subscribe for %s', str(block_replica))
                    return
        
                all_files = block_replica.block.files
                missing_files = all_files - block_replica.files()

                for lfile in missing_files:
                    self.rlfsm.subscribe_file(block_replica.site, lfile)

                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        # no external dependency - everything is a success
        return result
Beispiel #2
0
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        LOG.info('Ignoring deletion schedule of %d replicas (operation %d)',
                 len(replica_list), operation_id)

        result = []

        for replica, block_replicas in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)

            if block_replicas is None:
                result.append((clone_replica, None))
            else:
                clone_block_replicas = []

                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site,
                                                       block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clone_block_replicas.append(clone_block_replica)

                result.append((clone_replica, clone_block_replicas))

        return result
Beispiel #3
0
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        LOG.info(
            'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)',
            len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset,
                                           dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        return clones
Beispiel #4
0
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        LOG.info('Ignoring copy schedule of %d replicas (operation %d)', len(replica_list), operation_id)

        result = []

        for replica in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)
            
            for block_replica in replica.block_replicas:
                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        return result
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id)

        result = []

        for replica in replica_list:
            # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block)
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)

            for block_replica in replica.block_replicas:
                LOG.debug('Subscribing files for %s', str(block_replica))

                if block_replica.file_ids is None:
                    LOG.debug('No file to subscribe for %s', str(block_replica))
                    return
        
                all_files = block_replica.block.files
                missing_files = all_files - block_replica.files()

                for lfile in missing_files:
                    self.rlfsm.subscribe_file(block_replica.site, lfile)

                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        if not self._read_only:
            for clone_replica in result:
                if clone_replica.growing:
                    self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name, clone_replica.group.name)
                else:
                    for block_replica in clone_replica.block_replicas:
                        self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name, block_replica.group.name)

        # no external dependency - everything is a success
        return result
    def schedule_deletions(self, replica_list, operation_id, comments = ''): #override
        LOG.info('Ignoring deletion schedule of %d replicas (operation %d)', len(replica_list), operation_id)

        result = []

        for replica, block_replicas in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)

            if block_replicas is None:
                result.append((clone_replica, None))
            else:
                clone_block_replicas = []
    
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clone_block_replicas.append(clone_block_replica)
                    
                result.append((clone_replica, clone_block_replicas))

        return result
    def schedule_deletions(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        site = list(sites)[0]

        LOG.info('Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block, block_replica.site)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        return clones
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_deletions should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        LOG.info(
            'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)',
            len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset,
                                           dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site,
                                                       block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        if not self._read_only:
            for clone_replica, block_replicas in clones:
                if block_replicas is None:
                    self.mysql.query(
                        'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)',
                        operation_id, clone_replica.dataset.name,
                        clone_replica.site.name)
                else:
                    for block_replica in block_replicas:
                        self.mysql.query(
                            'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)',
                            operation_id, block_replica.block.full_name(),
                            clone_replica.site.name)

        return clones
Beispiel #9
0
    def _build_partition(self, inventory):
        """Create a mini-inventory consisting only of replicas in the partition."""

        partition_repository = ObjectRepository()

        LOG.info('Identifying target sites.')

        partition = inventory.partitions[self.policy.partition_name]

        partition.embed_tree(partition_repository)

        # Ask each site if deletion should be triggered.
        target_sites = set()  # target sites of this detox cycle
        tape_is_target = False
        for site in inventory.sites.itervalues():
            # target_site_defs are SiteConditions, which take site_partition as the argument
            site_partition = site.partitions[partition]

            for targdef in self.policy.target_site_def:
                if targdef.match(site_partition):
                    target_sites.add(site)
                    if site.storage_type == Site.TYPE_MSS:
                        tape_is_target = True

                    break

        if len(target_sites) == 0:
            LOG.info('No site matches the target definition.')
            return partition_repository

        # Safety measure - if there are empty (no block rep) tape replicas, create block replicas with size 0 and
        # add them into the partition. We will not report back to the main process though (i.e. won't call inventory.update).
        if tape_is_target:
            for site in filter(lambda s: s.storage_type == Site.TYPE_MSS,
                               target_sites):
                for replica in site.dataset_replicas():
                    if len(replica.block_replicas) != 0:
                        continue

                    for block in replica.dataset.blocks:
                        block_replica = BlockReplica(block,
                                                     site,
                                                     Group.null_group,
                                                     size=0)
                        replica.block_replicas.add(block_replica)
                        block.replicas.add(block_replica)

                    # Add to the site partition
                    site.partitions[partition].replicas[replica] = None

        # Create a copy of the inventory, limiting to the current partition
        # We will be stripping replicas off the image as we process the policy in iterations
        LOG.info('Creating a partition image.')

        for group in inventory.groups.itervalues():
            group.embed_into(partition_repository)

        # Now clone the sites, datasets, and replicas
        # Basically a copy-paste of various embed_into() functions ommitting the checks

        # make a map to avoid excessive lookups
        block_to_clone = {}
        for site in target_sites:
            site_clone = site.embed_into(partition_repository)

            site_partition = site.partitions[partition]
            site_partition_clone = site_partition.embed_tree(
                partition_repository)

            for dataset_replica, block_replica_set in site_partition.replicas.iteritems(
            ):
                dataset = dataset_replica.dataset

                try:
                    dataset_clone = partition_repository.datasets[dataset.name]

                except KeyError:
                    dataset_clone = dataset.embed_into(partition_repository)

                    for block in dataset.blocks:
                        block_clone = Block(block.name, dataset_clone)
                        block_clone.copy(block)
                        dataset_clone.blocks.add(block_clone)

                        block_to_clone[block] = block_clone

                replica_clone = DatasetReplica(dataset_clone, site_clone)
                dataset_clone.replicas.add(replica_clone)
                site_clone.add_dataset_replica(replica_clone,
                                               add_block_replicas=False)

                if block_replica_set is None:
                    # all block reps in partition
                    block_replica_set = dataset_replica.block_replicas
                    full_replica = True
                    site_partition_clone.replicas[replica_clone] = None
                else:
                    full_replica = False
                    block_replica_clone_set = site_partition_clone.replicas[
                        replica_clone] = set()

                for block_replica in block_replica_set:
                    block_clone = block_to_clone[block_replica.block]

                    block_replica_clone = BlockReplica(block_clone, site_clone,
                                                       block_replica.group)
                    block_replica_clone.copy(block_replica)
                    # group has to be reset to the clone
                    block_replica_clone.group = partition_repository.groups[
                        block_replica.group.name]

                    replica_clone.block_replicas.add(block_replica_clone)
                    block_clone.replicas.add(block_replica_clone)

                    if not full_replica:
                        block_replica_clone_set.add(block_replica_clone)

        return partition_repository
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        if site.storage_type == Site.TYPE_MSS and not self.allow_tape_deletion:
            LOG.warning('Deletion from MSS not allowed by configuration.')
            return []

        if self.allow_tape_deletion and self.auto_approval:
            LOG.warning(
                'You cannot have auto-approved tape deletions. Set auto-approval to False.'
            )
            return []

        # execute the deletions in two steps: one for dataset-level and one for block-level
        datasets = []
        blocks = []

        # maps used later for cloning
        # getting ugly here.. should come up with a better way of making clones
        replica_map = {}
        block_replica_map = {}

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                datasets.append(dataset_replica.dataset)
            else:
                blocks.extend(br.block for br in block_replicas)

                replica_map[dataset_replica.dataset] = dataset_replica
                block_replica_map.update(
                    (br.block, br) for br in block_replicas)

        success = []

        deleted_datasets = self._run_deletion_request(operation_id, site,
                                                      'dataset', datasets,
                                                      comments)

        for dataset in deleted_datasets:
            replica = DatasetReplica(dataset,
                                     site,
                                     growing=False,
                                     group=Group.null_group)
            success.append((replica, None))

        tmp_map = dict((dataset, []) for dataset in replica_map.iterkeys())

        deleted_blocks = self._run_deletion_request(operation_id, site,
                                                    'block', blocks, comments)

        for block in deleted_blocks:
            tmp_map[block.dataset].append(block)

        for dataset, blocks in tmp_map.iteritems():
            replica = DatasetReplica(dataset, site)
            replica.copy(replica_map[dataset])

            success.append((replica, []))
            for block in blocks:
                block_replica = BlockReplica(block, site, Group.null_group)
                block_replica.copy(block_replica_map[block])
                block_replica.last_update = int(time.time())
                success[-1][1].append(block_replica)

        return success