def schedule_copies(self, replica_list, operation_id, comments = ''): #override sites = set(r.site for r in replica_list) if len(sites) != 1: raise OperationalError('schedule_copies should be called with a list of replicas at a single site.') LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id) result = [] for replica in replica_list: # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block) clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) result.append(clone_replica) for block_replica in replica.block_replicas: LOG.debug('Subscribing files for %s', str(block_replica)) if block_replica.file_ids is None: LOG.debug('No file to subscribe for %s', str(block_replica)) return all_files = block_replica.block.files missing_files = all_files - block_replica.files() for lfile in missing_files: self.rlfsm.subscribe_file(block_replica.site, lfile) clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_replica.block_replicas.add(clone_block_replica) # no external dependency - everything is a success return result
def schedule_deletions(self, replica_list, operation_id, comments=''): #override LOG.info('Ignoring deletion schedule of %d replicas (operation %d)', len(replica_list), operation_id) result = [] for replica, block_replicas in replica_list: clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) if block_replicas is None: result.append((clone_replica, None)) else: clone_block_replicas = [] for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_block_replicas.append(clone_block_replica) result.append((clone_replica, clone_block_replicas)) return result
def schedule_deletions(self, replica_list, operation_id, comments=''): #override sites = set(r.site for r, b in replica_list) if len(sites) != 1: raise OperationalError( 'schedule_copies should be called with a list of replicas at a single site.' ) site = list(sites)[0] LOG.info( 'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id) clones = [] for dataset_replica, block_replicas in replica_list: if block_replicas is None: to_delete = dataset_replica.block_replicas else: to_delete = block_replicas for block_replica in to_delete: for lfile in block_replica.files(): self.rlfsm.desubscribe_file(block_replica.site, lfile) # No external dependency -> all operations are successful clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site) clone_replica.copy(dataset_replica) if block_replicas is None: clones.append((clone_replica, None)) else: clones.append((clone_replica, [])) for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clones[-1][1].append(clone_block_replica) return clones
def schedule_copies(self, replica_list, operation_id, comments = ''): #override LOG.info('Ignoring copy schedule of %d replicas (operation %d)', len(replica_list), operation_id) result = [] for replica in replica_list: clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) result.append(clone_replica) for block_replica in replica.block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_replica.block_replicas.add(clone_block_replica) return result
def schedule_copies(self, replica_list, operation_id, comments = ''): #override sites = set(r.site for r in replica_list) if len(sites) != 1: raise OperationalError('schedule_copies should be called with a list of replicas at a single site.') LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id) result = [] for replica in replica_list: # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block) clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) result.append(clone_replica) for block_replica in replica.block_replicas: LOG.debug('Subscribing files for %s', str(block_replica)) if block_replica.file_ids is None: LOG.debug('No file to subscribe for %s', str(block_replica)) return all_files = block_replica.block.files missing_files = all_files - block_replica.files() for lfile in missing_files: self.rlfsm.subscribe_file(block_replica.site, lfile) clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_replica.block_replicas.add(clone_block_replica) if not self._read_only: for clone_replica in result: if clone_replica.growing: self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name, clone_replica.group.name) else: for block_replica in clone_replica.block_replicas: self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name, block_replica.group.name) # no external dependency - everything is a success return result
def schedule_deletions(self, replica_list, operation_id, comments = ''): #override LOG.info('Ignoring deletion schedule of %d replicas (operation %d)', len(replica_list), operation_id) result = [] for replica, block_replicas in replica_list: clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) if block_replicas is None: result.append((clone_replica, None)) else: clone_block_replicas = [] for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_block_replicas.append(clone_block_replica) result.append((clone_replica, clone_block_replicas)) return result
def schedule_deletions(self, replica_list, operation_id, comments = ''): #override sites = set(r.site for r, b in replica_list) if len(sites) != 1: raise OperationalError('schedule_copies should be called with a list of replicas at a single site.') site = list(sites)[0] LOG.info('Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id) clones = [] for dataset_replica, block_replicas in replica_list: if block_replicas is None: to_delete = dataset_replica.block_replicas else: to_delete = block_replicas for block_replica in to_delete: for lfile in block_replica.files(): self.rlfsm.desubscribe_file(block_replica.site, lfile) # No external dependency -> all operations are successful clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site) clone_replica.copy(dataset_replica) if block_replicas is None: clones.append((clone_replica, None)) else: clones.append((clone_replica, [])) for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clones[-1][1].append(clone_block_replica) return clones
def schedule_deletions(self, replica_list, operation_id, comments=''): #override sites = set(r.site for r, b in replica_list) if len(sites) != 1: raise OperationalError( 'schedule_deletions should be called with a list of replicas at a single site.' ) site = list(sites)[0] LOG.info( 'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id) clones = [] for dataset_replica, block_replicas in replica_list: if block_replicas is None: to_delete = dataset_replica.block_replicas else: to_delete = block_replicas for block_replica in to_delete: for lfile in block_replica.files(): self.rlfsm.desubscribe_file(block_replica.site, lfile) # No external dependency -> all operations are successful clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site) clone_replica.copy(dataset_replica) if block_replicas is None: clones.append((clone_replica, None)) else: clones.append((clone_replica, [])) for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clones[-1][1].append(clone_block_replica) if not self._read_only: for clone_replica, block_replicas in clones: if block_replicas is None: self.mysql.query( 'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name) else: for block_replica in block_replicas: self.mysql.query( 'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name) return clones
def _build_partition(self, inventory): """Create a mini-inventory consisting only of replicas in the partition.""" partition_repository = ObjectRepository() LOG.info('Identifying target sites.') partition = inventory.partitions[self.policy.partition_name] partition.embed_tree(partition_repository) # Ask each site if deletion should be triggered. target_sites = set() # target sites of this detox cycle tape_is_target = False for site in inventory.sites.itervalues(): # target_site_defs are SiteConditions, which take site_partition as the argument site_partition = site.partitions[partition] for targdef in self.policy.target_site_def: if targdef.match(site_partition): target_sites.add(site) if site.storage_type == Site.TYPE_MSS: tape_is_target = True break if len(target_sites) == 0: LOG.info('No site matches the target definition.') return partition_repository # Safety measure - if there are empty (no block rep) tape replicas, create block replicas with size 0 and # add them into the partition. We will not report back to the main process though (i.e. won't call inventory.update). if tape_is_target: for site in filter(lambda s: s.storage_type == Site.TYPE_MSS, target_sites): for replica in site.dataset_replicas(): if len(replica.block_replicas) != 0: continue for block in replica.dataset.blocks: block_replica = BlockReplica(block, site, Group.null_group, size=0) replica.block_replicas.add(block_replica) block.replicas.add(block_replica) # Add to the site partition site.partitions[partition].replicas[replica] = None # Create a copy of the inventory, limiting to the current partition # We will be stripping replicas off the image as we process the policy in iterations LOG.info('Creating a partition image.') for group in inventory.groups.itervalues(): group.embed_into(partition_repository) # Now clone the sites, datasets, and replicas # Basically a copy-paste of various embed_into() functions ommitting the checks # make a map to avoid excessive lookups block_to_clone = {} for site in target_sites: site_clone = site.embed_into(partition_repository) site_partition = site.partitions[partition] site_partition_clone = site_partition.embed_tree( partition_repository) for dataset_replica, block_replica_set in site_partition.replicas.iteritems( ): dataset = dataset_replica.dataset try: dataset_clone = partition_repository.datasets[dataset.name] except KeyError: dataset_clone = dataset.embed_into(partition_repository) for block in dataset.blocks: block_clone = Block(block.name, dataset_clone) block_clone.copy(block) dataset_clone.blocks.add(block_clone) block_to_clone[block] = block_clone replica_clone = DatasetReplica(dataset_clone, site_clone) dataset_clone.replicas.add(replica_clone) site_clone.add_dataset_replica(replica_clone, add_block_replicas=False) if block_replica_set is None: # all block reps in partition block_replica_set = dataset_replica.block_replicas full_replica = True site_partition_clone.replicas[replica_clone] = None else: full_replica = False block_replica_clone_set = site_partition_clone.replicas[ replica_clone] = set() for block_replica in block_replica_set: block_clone = block_to_clone[block_replica.block] block_replica_clone = BlockReplica(block_clone, site_clone, block_replica.group) block_replica_clone.copy(block_replica) # group has to be reset to the clone block_replica_clone.group = partition_repository.groups[ block_replica.group.name] replica_clone.block_replicas.add(block_replica_clone) block_clone.replicas.add(block_replica_clone) if not full_replica: block_replica_clone_set.add(block_replica_clone) return partition_repository
def schedule_deletions(self, replica_list, operation_id, comments=''): #override sites = set(r.site for r, b in replica_list) if len(sites) != 1: raise OperationalError( 'schedule_copies should be called with a list of replicas at a single site.' ) site = list(sites)[0] if site.storage_type == Site.TYPE_MSS and not self.allow_tape_deletion: LOG.warning('Deletion from MSS not allowed by configuration.') return [] if self.allow_tape_deletion and self.auto_approval: LOG.warning( 'You cannot have auto-approved tape deletions. Set auto-approval to False.' ) return [] # execute the deletions in two steps: one for dataset-level and one for block-level datasets = [] blocks = [] # maps used later for cloning # getting ugly here.. should come up with a better way of making clones replica_map = {} block_replica_map = {} for dataset_replica, block_replicas in replica_list: if block_replicas is None: datasets.append(dataset_replica.dataset) else: blocks.extend(br.block for br in block_replicas) replica_map[dataset_replica.dataset] = dataset_replica block_replica_map.update( (br.block, br) for br in block_replicas) success = [] deleted_datasets = self._run_deletion_request(operation_id, site, 'dataset', datasets, comments) for dataset in deleted_datasets: replica = DatasetReplica(dataset, site, growing=False, group=Group.null_group) success.append((replica, None)) tmp_map = dict((dataset, []) for dataset in replica_map.iterkeys()) deleted_blocks = self._run_deletion_request(operation_id, site, 'block', blocks, comments) for block in deleted_blocks: tmp_map[block.dataset].append(block) for dataset, blocks in tmp_map.iteritems(): replica = DatasetReplica(dataset, site) replica.copy(replica_map[dataset]) success.append((replica, [])) for block in blocks: block_replica = BlockReplica(block, site, Group.null_group) block_replica.copy(block_replica_map[block]) block_replica.last_update = int(time.time()) success[-1][1].append(block_replica) return success