def load(self, inventory): # collect the name of items that are not yet activated or are activated but not queued sql = 'SELECT i.`item` FROM `copy_request_items` AS i INNER JOIN `copy_requests` AS r ON r.`id` = i.`request_id`' sql += ' WHERE r.`status` = \'new\'' items = self.registry.db.query(sql) items += self.registry.db.query( 'SELECT `item` FROM `active_copies` WHERE `status` = \'new\'') for item_name in items: try: dataset_name, block_name = Block.from_full_name(item_name) except ObjectError: dataset_name, block_name = item_name, None try: dataset = inventory.datasets[dataset_name] except KeyError: continue if block_name is not None: block = dataset.find_block(block_name) if block is None: continue dataset.attr['unhandled_copy_exists'] = True
def load(self, inventory): for site in inventory.sites.itervalues(): if site.storage_type != Site.TYPE_MSS: continue requests = self._phedex.make_request( 'transferrequests', ['node=' + site.name, 'approval=pending']) for request in requests: for dest in request['destinations']['node']: if dest['name'] != site.name: continue if 'decided_by' in dest: break for dataset_entry in request['data']['dbs']['dataset']: try: dataset = inventory.datasets[dataset_entry['name']] except KeyError: continue dataset.attr['tape_copy_requested'] = True for block_entry in request['data']['dbs']['block']: dataset_name, block_name = Block.from_full_name( block_entry['name']) try: dataset = inventory.datasets[dataset_name] except KeyError: continue # just label the entire dataset dataset.attr['tape_copy_requested'] = True
def run(self, caller, request, inventory): if caller.name not in self.authorized_users: raise AuthorizationError() try: item = request['item'] except KeyError: raise MissingParameter('item') if type(item) is list: items = item else: items = [item] cancelled_items = [] sql = 'DELETE FROM `invalidations` WHERE `item` = %s AND `user_id` = %s' for item in items: deleted = self.registry.db.query(sql, item, caller.id) if deleted != 0: cancelled_items.append({'item': item}) if item in inventory.datasets: # item is a dataset for entry in self.dbs.make_request( 'files', ['dataset=' + item, 'validFileOnly=1']): self.registry.db.query(sql, entry['logical_file_name'], caller.id) else: try: dataset_name, block_name = Block.from_full_name(item) except: pass else: # item is a block for entry in self.dbs.make_request( 'files', ['block_name=' + item, 'validFileOnly=1']): self.registry.db.query(sql, entry['logical_file_name'], caller.id) return cancelled_items
def postprocess(self, cycle_number, copy_list): # override """ Create active copy entries for accepted copies. """ for request in self.activated_requests: updated = False for action in request.actions: try: dataset_name, block_name = Block.from_full_name( action.item) except ObjectError: dataset_name = action.item block_name = None for replica in copy_list: if replica.site.name != action.site: continue if replica.growing: # full dataset copy - dataset and block requests are both queued if dataset_name == replica.dataset.name: action.status = RequestAction.ST_QUEUED else: # match block-by-block if block_name is None: # dataset request continue for block_replica in replica.block_replicas: if block_name == block_replica.block.real_name(): action.status = RequestAction.ST_QUEUED break if action.status == RequestAction.ST_QUEUED: updated = True # action got queued - no need to check other replicas break if updated: self.request_manager.update_request(request)
def postprocess(self, cycle_number, copy_list): # override """ Create active copy entries for accepted copies. """ for request in self.activated_requests: updated = False for action in request.actions: try: dataset_name, block_name = Block.from_full_name(action.item) except ObjectError: dataset_name = action.item block_name = None for replica in copy_list: if replica.site.name != action.site: continue if replica.growing: # full dataset copy - dataset and block requests are both queued if dataset_name == replica.dataset.name: action.status = RequestAction.ST_QUEUED else: # match block-by-block if block_name is None: # dataset request continue for block_replica in replica.block_replicas: if block_name == block_replica.block.real_name(): action.status = RequestAction.ST_QUEUED break if action.status == RequestAction.ST_QUEUED: updated = True # action got queued - no need to check other replicas break if updated: self.request_manager.update_request(request)
def make_block_replicas(block_entries, replica_maker, site_check=None, dataset_check=None): """Return a list of block replicas linked to Dataset, Block, Site, and Group""" dataset = None block_replicas = [] for block_entry in block_entries: try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: # invalid name continue if dataset is None or dataset.name != dataset_name: if dataset_check and not dataset_check(dataset_name): continue try: dataset = Dataset(dataset_name) except ObjectError: # invalid name dataset = None if dataset is None: continue block = Block(block_name, dataset, block_entry['bytes']) if block.size is None: block.size = 0 block_replicas.extend( replica_maker(block, block_entry, site_check=site_check)) return block_replicas
def load(self, inventory): # collect the name of items that are not yet activated or are activated but not queued sql = 'SELECT i.`item` FROM `copy_request_items` AS i INNER JOIN `copy_requests` AS r ON r.`id` = i.`request_id`' sql += ' WHERE r.`status` = \'new\'' items = self.registry.db.query(sql) items += self.registry.db.query('SELECT `item` FROM `active_copies` WHERE `status` = \'new\'') for item_name in items: try: dataset_name, block_name = Block.from_full_name(item_name) except ObjectError: dataset_name, block_name = item_name, None try: dataset = inventory.datasets[dataset_name] except KeyError: continue if block_name is not None: block = dataset.find_block(block_name) if block is None: continue dataset.attr['unhandled_copy_exists'] = True
def get_requests(self, inventory, policy): #override """ 1. Request all active transfers in new state (these were not queued in the last cycle) 2. Find all transfer requests with status new. 3. Decide whether to accept the request. Set status accordingly. 4. Find the destinations if wildcard was used. """ partition = inventory.partitions[policy.partition_name] overwritten_groups = [ inventory.groups[name] for name in self.overwritten_groups ] self.activated_requests = [] # full list of blocks to be proposed to Dealer blocks_to_propose = {} # {site: {dataset: set of blocks}} now = int(time.time()) # Re-request new actions within activated requests self.request_manager.lock() active_requests = self.request_manager.get_requests( statuses=[Request.ST_ACTIVATED]) activation_list = [] for request in active_requests.itervalues(): updated = False to_be_activated = False for action in request.actions: if action.status != RequestAction.ST_NEW: continue try: site = inventory.sites[action.site] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue try: dataset_name, block_name = Block.from_full_name( action.item) except ObjectError: # action.item is (supposed to be) a dataset name try: dataset = inventory.datasets[action.item] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue existing_replica = site.find_dataset_replica(dataset) if existing_replica is not None: if existing_replica.is_complete(): action.status = RequestAction.ST_COMPLETED else: # it was queued by someone action.status = RequestAction.ST_QUEUED action.last_update = now updated = True else: activation_list.append((dataset, site)) to_be_activated = True else: # action.item is a block name try: dataset = inventory.datasets[dataset_name] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue block = dataset.find_block(block_name) if block is None: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue existing_replica = block.find_replica(site) if existing_replica is not None: if existing_replica.is_complete(): action.status = RequestAction.ST_COMPLETED else: action.status = RequestAction.ST_QUEUED action.last_update = now updated = True else: activation_list.append((block, site)) to_be_activated = True if updated: self.request_manager.update_request(request) if to_be_activated: self.activated_requests.append(request) self.request_manager.unlock() for item, site in activation_list: try: site_blocks = blocks_to_propose[site] except KeyError: site_blocks = blocks_to_propose[site] = {} if type(item) is Dataset: site_blocks[item] = set(item.blocks) else: dataset = item.dataset try: blocks = site_blocks[dataset] except KeyError: blocks = site_blocks[dataset] = set() blocks.add(item) ## deal with new requests self.request_manager.lock() new_requests = self.request_manager.get_requests( statuses=[Request.ST_NEW]) def reject(request, reason): request.status = Request.ST_REJECTED request.reject_reason = reason self.request_manager.update_request(request) for request in new_requests.itervalues(): try: group = inventory.groups[request.group] except KeyError: reject(request, 'Invalid group name %s' % request.group) continue invalid_items = [] datasets = request.find_items(inventory, invalid_items) sites = filter(lambda s: s in policy.target_sites, request.find_sites(inventory)) if len(invalid_items) != 0: reject(request, 'Invalid item names: [%s]' % ','.join(invalid_items)) continue if len(sites) == 0: reject(request, 'Target sites not available for transfers') continue # convert to DealerRequests proto_dealer_requests = [] # process the items list for dataset, blocks in datasets.iteritems(): if blocks is None: if dataset.size > self.max_size: reject( request, 'Dataset %s is too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12)) break item = dataset else: total_size = sum(b.size for b in blocks) if total_size > self.max_size: reject( request, 'Request size for %s too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12)) break if total_size > float( dataset.size) * self.block_request_max: # if the total size of requested blocks is large enough, just copy the dataset # covers the case where we actually have the full list of blocks (if block_request_max is less than 1) item = dataset else: item = list(blocks) proto_dealer_requests.append(DealerRequest(item, group=group)) if request.status == Request.ST_REJECTED: continue # list of (item, site) to be activated (not necessarily proposed to dealer - there can be another request for the same item-site) activation_list = [] # list of dealer proposals new_dealer_requests = [] # find destinations (request.n times) for each item for proto_request in proto_dealer_requests: # try to make a dealer request for all requests, except when there is a full copy of the item if request.n == 0: # make one copy at each site for destination in sites: dealer_request = DealerRequest(proto_request.item(), destination=destination) if dealer_request.item_already_exists() == 2: # nothing to do for this one continue rejection_reason = policy.check_destination( dealer_request, partition) if rejection_reason is not None: reject( request, 'Cannot copy %s to %s' % (dealer_request.item_name(), destination.name)) break new_dealer_requests.append(dealer_request) if request.status == Request.ST_REJECTED: break else: # total of n copies candidate_sites = [] num_new = request.n # bring sites where the item already exists first (may want to just "flip" the ownership) sites_and_existence = [] for destination in sites: exists = proto_request.item_already_exists( destination) # 0, 1, or 2 if exists != 0: sites_and_existence.insert(0, (destination, exists)) else: sites_and_existence.append((destination, exists)) for destination, exists in sites_and_existence: if num_new == 0: break dealer_request = DealerRequest(proto_request.item(), destination=destination) # copies proposed by other requests -> just activate try: proposed_blocks = blocks_to_propose[destination][ dealer_request.dataset] except KeyError: pass else: if dealer_request.blocks is not None: if set(dealer_request.blocks ) <= proposed_blocks: num_new -= 1 for block in dealer_request.blocks: activation_list.append( (block.full_name(), dealer_request.destination.name, now)) continue else: if dealer_request.dataset.blocks == proposed_blocks: num_new -= 1 activation_list.append( (dealer_request.item_name(), dealer_request.destination.name, now)) continue # if the item already exists, it's a complete copy - don't activate, don't propose if exists == 2: num_new -= 1 elif exists == 1: # if the current group can be overwritten, make a request # otherwise skip single_owner = dealer_request.item_owned_by( ) # None if owned by multiple groups if single_owner is not None and single_owner in overwritten_groups: new_dealer_requests.append(dealer_request) num_new -= 1 else: candidate_sites.append(destination) for icopy in range(num_new): dealer_request = DealerRequest(proto_request.item()) # pick a destination randomly (weighted by available space) policy.find_destination_for(dealer_request, partition, candidates=candidate_sites) if dealer_request.destination is None: # if any of the item cannot find any of the num_new destinations, reject the request reject( request, 'Destination %d for %s not available' % (icopy, dealer_request.item_name())) break candidate_sites.remove(dealer_request.destination) new_dealer_requests.append(dealer_request) # if request.n == 0, else if request.status == Request.ST_REJECTED: break # for each item in request if request.status == Request.ST_REJECTED: continue if len(new_dealer_requests) == 0 and len(activation_list) == 0: # nothing to do request.status = Request.ST_COMPLETED self.request_manager.update_request(request) continue # finally add to the returned requests for dealer_request in new_dealer_requests: try: site_blocks = blocks_to_propose[dealer_request.destination] except KeyError: site_blocks = blocks_to_propose[ dealer_request.destination] = {} if dealer_request.blocks is not None: try: blocks = site_blocks[dealer_request.dataset] except KeyError: blocks = site_blocks[dealer_request.dataset] = set() blocks.update(dealer_request.blocks) for block in dealer_request.blocks: activation_list.append( (block.full_name(), dealer_request.destination.name, now)) else: site_blocks[dealer_request.dataset] = set( dealer_request.dataset.blocks) activation_list.append( (dealer_request.item_name(), dealer_request.destination.name, now)) # create actions and set request status to ACTIVATED request.activate(activation_list) self.request_manager.update_request(request) self.activated_requests.append(request) self.request_manager.unlock() # throw away all the DealerRequest objects we've been using and form the final proposal dealer_requests = [] for site, block_list in blocks_to_propose.iteritems(): for dataset, blocks in block_list.iteritems(): if blocks == dataset.blocks: dealer_requests.append( DealerRequest(dataset, destination=site)) else: dealer_requests.append( DealerRequest(list(blocks), destination=site)) return dealer_requests
def get_requests(self, inventory, policy): #override """ 1. Request all active transfers in new state (these were not queued in the last cycle) 2. Find all transfer requests with status new. 3. Decide whether to accept the request. Set status accordingly. 4. Find the destinations if wildcard was used. """ partition = inventory.partitions[policy.partition_name] overwritten_groups = [inventory.groups[name] for name in self.overwritten_groups] self.activated_requests = [] # full list of blocks to be proposed to Dealer blocks_to_propose = {} # {site: {dataset: set of blocks}} now = int(time.time()) # Re-request new actions within activated requests self.request_manager.lock() active_requests = self.request_manager.get_requests(statuses = [Request.ST_ACTIVATED]) activation_list = [] for request in active_requests.itervalues(): updated = False to_be_activated = False for action in request.actions: if action.status != RequestAction.ST_NEW: continue try: site = inventory.sites[action.site] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue try: dataset_name, block_name = Block.from_full_name(action.item) except ObjectError: # action.item is (supposed to be) a dataset name try: dataset = inventory.datasets[action.item] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue existing_replica = site.find_dataset_replica(dataset) if existing_replica is not None: if existing_replica.is_complete(): action.status = RequestAction.ST_COMPLETED else: # it was queued by someone action.status = RequestAction.ST_QUEUED action.last_update = now updated = True else: activation_list.append((dataset, site)) to_be_activated = True else: # action.item is a block name try: dataset = inventory.datasets[dataset_name] except KeyError: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue block = dataset.find_block(block_name) if block is None: action.status = RequestAction.ST_FAILED action.last_update = now updated = True continue existing_replica = block.find_replica(site) if existing_replica is not None: if existing_replica.is_complete(): action.status = RequestAction.ST_COMPLETED else: action.status = RequestAction.ST_QUEUED action.last_update = now updated = True else: activation_list.append((block, site)) to_be_activated = True if updated: self.request_manager.update_request(request) if to_be_activated: self.activated_requests.append(request) self.request_manager.unlock() for item, site in activation_list: try: site_blocks = blocks_to_propose[site] except KeyError: site_blocks = blocks_to_propose[site] = {} if type(item) is Dataset: site_blocks[item] = set(item.blocks) else: dataset = item.dataset try: blocks = site_blocks[dataset] except KeyError: blocks = site_blocks[dataset] = set() blocks.add(item) ## deal with new requests self.request_manager.lock() new_requests = self.request_manager.get_requests(statuses = [Request.ST_NEW]) def reject(request, reason): request.status = Request.ST_REJECTED request.reject_reason = reason self.request_manager.update_request(request) for request in new_requests.itervalues(): try: group = inventory.groups[request.group] except KeyError: reject(request, 'Invalid group name %s' % request.group) continue invalid_items = [] datasets = request.find_items(inventory, invalid_items) sites = filter(lambda s: s in policy.target_sites, request.find_sites(inventory)) if len(invalid_items) != 0: reject(request, 'Invalid item names: [%s]' % ','.join(invalid_items)) continue if len(sites) == 0: reject(request, 'Target sites not available for transfers') continue # convert to DealerRequests proto_dealer_requests = [] # process the items list for dataset, blocks in datasets.iteritems(): if blocks is None: if dataset.size > self.max_size: reject(request, 'Dataset %s is too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12)) break item = dataset else: total_size = sum(b.size for b in blocks) if total_size > self.max_size: reject(request, 'Request size for %s too large (>%.0f TB)' % (dataset.name, self.max_size * 1.e-12)) break if total_size > float(dataset.size) * self.block_request_max: # if the total size of requested blocks is large enough, just copy the dataset # covers the case where we actually have the full list of blocks (if block_request_max is less than 1) item = dataset else: item = list(blocks) proto_dealer_requests.append(DealerRequest(item, group = group)) if request.status == Request.ST_REJECTED: continue # list of (item, site) to be activated (not necessarily proposed to dealer - there can be another request for the same item-site) activation_list = [] # list of dealer proposals new_dealer_requests = [] # find destinations (request.n times) for each item for proto_request in proto_dealer_requests: # try to make a dealer request for all requests, except when there is a full copy of the item if request.n == 0: # make one copy at each site for destination in sites: dealer_request = DealerRequest(proto_request.item(), destination = destination) if dealer_request.item_already_exists() == 2: # nothing to do for this one continue rejection_reason = policy.check_destination(dealer_request, partition) if rejection_reason is not None: reject(request, 'Cannot copy %s to %s' % (dealer_request.item_name(), destination.name)) break new_dealer_requests.append(dealer_request) if request.status == Request.ST_REJECTED: break else: # total of n copies candidate_sites = [] num_new = request.n # bring sites where the item already exists first (may want to just "flip" the ownership) sites_and_existence = [] for destination in sites: exists = proto_request.item_already_exists(destination) # 0, 1, or 2 if exists != 0: sites_and_existence.insert(0, (destination, exists)) else: sites_and_existence.append((destination, exists)) for destination, exists in sites_and_existence: if num_new == 0: break dealer_request = DealerRequest(proto_request.item(), destination = destination) # copies proposed by other requests -> just activate try: proposed_blocks = blocks_to_propose[destination][dealer_request.dataset] except KeyError: pass else: if dealer_request.blocks is not None: if set(dealer_request.blocks) <= proposed_blocks: num_new -= 1 for block in dealer_request.blocks: activation_list.append((block.full_name(), dealer_request.destination.name, now)) continue else: if dealer_request.dataset.blocks == proposed_blocks: num_new -= 1 activation_list.append((dealer_request.item_name(), dealer_request.destination.name, now)) continue # if the item already exists, it's a complete copy - don't activate, don't propose if exists == 2: num_new -= 1 elif exists == 1: # if the current group can be overwritten, make a request # otherwise skip single_owner = dealer_request.item_owned_by() # None if owned by multiple groups if single_owner is not None and single_owner in overwritten_groups: new_dealer_requests.append(dealer_request) num_new -= 1 else: candidate_sites.append(destination) for icopy in range(num_new): dealer_request = DealerRequest(proto_request.item()) # pick a destination randomly (weighted by available space) policy.find_destination_for(dealer_request, partition, candidates = candidate_sites) if dealer_request.destination is None: # if any of the item cannot find any of the num_new destinations, reject the request reject(request, 'Destination %d for %s not available' % (icopy, dealer_request.item_name())) break candidate_sites.remove(dealer_request.destination) new_dealer_requests.append(dealer_request) # if request.n == 0, else if request.status == Request.ST_REJECTED: break # for each item in request if request.status == Request.ST_REJECTED: continue if len(new_dealer_requests) == 0 and len(activation_list) == 0: # nothing to do request.status = Request.ST_COMPLETED self.request_manager.update_request(request) continue # finally add to the returned requests for dealer_request in new_dealer_requests: try: site_blocks = blocks_to_propose[dealer_request.destination] except KeyError: site_blocks = blocks_to_propose[dealer_request.destination] = {} if dealer_request.blocks is not None: try: blocks = site_blocks[dealer_request.dataset] except KeyError: blocks = site_blocks[dealer_request.dataset] = set() blocks.update(dealer_request.blocks) for block in dealer_request.blocks: activation_list.append((block.full_name(), dealer_request.destination.name, now)) else: site_blocks[dealer_request.dataset] = set(dealer_request.dataset.blocks) activation_list.append((dealer_request.item_name(), dealer_request.destination.name, now)) # create actions and set request status to ACTIVATED request.activate(activation_list) self.request_manager.update_request(request) self.activated_requests.append(request) self.request_manager.unlock() # throw away all the DealerRequest objects we've been using and form the final proposal dealer_requests = [] for site, block_list in blocks_to_propose.iteritems(): for dataset, blocks in block_list.iteritems(): if blocks == dataset.blocks: dealer_requests.append(DealerRequest(dataset, destination = site)) else: dealer_requests.append(DealerRequest(list(blocks), destination = site)) return dealer_requests
def get_list(self, inventory): all_locks = [] # [(item, site)] for source, content_type, site_pattern, lock_url in self._sources.itervalues( ): if lock_url is not None and isinstance(lock_url, basestring): # check that the lock files themselves are not locked while True: # Hacky but this is temporary any way opener = urllib2.build_opener( webservice.HTTPSCertKeyHandler(Configuration())) opener.addheaders.append(('Accept', 'application/json')) request = urllib2.Request(lock_url) try: opener.open(request) except urllib2.HTTPError as err: if err.code == 404: # file not found -> no lock break else: raise LOG.info( 'Lock files are being produced. Waiting 60 seconds.') time.sleep(60) elif not isinstance(lock_url, basestring): # lock_url is a tuple of Oracle db queries (a,b): a - checking for lock of locks, b - locks themselves # source is automatically an OracleService try: locked = True while locked: locked = False locks = source.make_request(lock_url[0].replace( '`', '"')) for lock in locks: if lock: locked = True break if not locked: break LOG.info( 'Locks are being produced. Waiting 60 seconds.') time.sleep(60) except: e = sys.exc_info()[0] LOG.error(e) pass if site_pattern is None: site_re = None else: site_re = re.compile(fnmatch.translate(site_pattern)) LOG.info('Retrieving lock information from %s', source) try: data = source.make_request() LOG.info("This was a standard WEB RESTAPI request") except TypeError: # OracleService expects a query text data = source.make_request(lock_url[1].replace('`', '"')) LOG.info("This was an Oracle DB request") if content_type == WebReplicaLock.LIST_OF_DATASETS: # simple list of datasets for dataset_name in data: if dataset_name is None: LOG.debug('Dataset name None found in %s', source) continue try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source) continue if site_re is not None: for replica in dataset.replicas: if not site_re.match(replica.site.name): continue all_locks.append((dataset, replica.site)) else: all_locks.append((dataset, None)) elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS: # data['result'] -> simple list of datasets for dataset_name in data['result']: if dataset_name is None: LOG.debug('Dataset name %s None found in %s', (dataset_name, source.url_base)) continue try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue if site_re is not None: for replica in dataset.replicas: if not site_re.match(replica.site.name): continue all_locks.append((dataset, replica.site)) else: all_locks.append((dataset, None)) elif content_type == WebReplicaLock.CMSWEB_LIST_OF_PARENT_DATASETS: # data['result'][0]['parentlocks'] -> simple list of datasets for dataset_name in data['result'][0]['parentlocks']: if dataset_name is None: LOG.debug('Dataset name %s None found in %s', (dataset_name, source.url_base)) continue try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue if site_re is not None: for replica in dataset.replicas: if not site_re.match(replica.site.name): continue all_locks.append((dataset, replica.site)) else: all_locks.append((dataset, None)) elif content_type == WebReplicaLock.SITE_TO_DATASETS: # data = {site: {dataset: info}} for site_name, objects in data.items(): try: site = inventory.sites[site_name] except KeyError: LOG.debug('Unknown site %s in %s', site_name, source.url_base) continue for object_name, info in objects.items(): if not info['lock']: LOG.debug('Object %s is not locked at %s', object_name, site_name) continue try: dataset_name, block_name = Block.from_full_name( object_name) except ObjectError: dataset_name, block_name = object_name, None try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue replica = site.find_dataset_replica(dataset) if replica is None: LOG.debug('Replica of %s is not at %s in %s', dataset_name, site_name, source.url_base) continue if block_name is None: all_locks.append((dataset, site)) else: block_replica = replica.find_block_replica( block_name) if block_replica is None: LOG.debug('Unknown block %s in %s', object_name, source.url_base) continue all_locks.append((block_replica.block, site)) return all_locks
def get_replicas(self, site=None, dataset=None, block=None): #override if site is None: site_check = self.check_allowed_site else: site_check = None if not self.check_allowed_site(site): return [] if dataset is None and block is None: dataset_check = self.check_allowed_dataset else: dataset_check = None if dataset is not None: if not self.check_allowed_dataset(dataset): return [] if block is not None: if not self.check_allowed_dataset(block[:block.find('#')]): return [] options = [] if site is not None: options.append('node=' + site) if dataset is not None: options.append('dataset=' + dataset) if block is not None: options.append('block=' + block) LOG.info('get_replicas(' + ','.join(options) + ') Fetching the list of replicas from PhEDEx') if len(options) == 0: return [] block_entries = self._phedex.make_request('blockreplicas', options, timeout=7200) parallelizer = Map() parallelizer.timeout = 7200 # Automatically starts a thread as we add the output of block_entries combine_file = parallelizer.get_starter(self._combine_file_info) for block_entry in block_entries: for replica_entry in block_entry['replica']: if replica_entry['complete'] == 'n': break else: continue # there is at least one incomplete replica try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: # invalid name continue if dataset_check and not dataset_check(dataset_name): continue combine_file.add_input(block_entry) combine_file.close() # _combine_file_info alters block_entries directly - no need to deal with output combine_file.get_outputs() block_replicas = PhEDExReplicaInfoSource.make_block_replicas( block_entries, PhEDExReplicaInfoSource.maker_blockreplicas, site_check=site_check, dataset_check=dataset_check) # Also use subscriptions call which has a lower latency than blockreplicas # For example, group change on a block replica at time T may not show up in blockreplicas until up to T + 15 minutes # while in subscriptions it is visible within a few seconds # But subscriptions call without a dataset or block takes too long if dataset is None and block is None: return block_replicas indexed = collections.defaultdict(dict) for replica in block_replicas: indexed[(replica.site.name, replica.block.dataset.name)][replica.block.name] = replica dataset_entries = self._phedex.make_request('subscriptions', options, timeout=3600) for dataset_entry in dataset_entries: dataset_name = dataset_entry['name'] if not self.check_allowed_dataset(dataset_name): continue try: subscriptions = dataset_entry['subscription'] except KeyError: pass else: for sub_entry in subscriptions: site_name = sub_entry['node'] if not self.check_allowed_site(site_name): continue replicas = indexed[(site_name, dataset_name)] for replica in replicas.itervalues(): replica.group = Group(sub_entry['group']) replica.is_custodial = (sub_entry['custodial'] == 'y') try: block_entries = dataset_entry['block'] except KeyError: pass else: for block_entry in block_entries: try: _, block_name = Block.from_full_name( block_entry['name']) except ObjectError: continue try: subscriptions = block_entry['subscription'] except KeyError: continue for sub_entry in subscriptions: site_name = sub_entry['node'] if not self.check_allowed_site(site_name): continue try: replica = indexed[(site_name, dataset_name)][block_name] except KeyError: continue replica.group = Group(sub_entry['group']) if sub_entry['node_bytes'] == block_entry['bytes']: # complete replica.size = sub_entry['node_bytes'] if replica.size is None: replica.size = 0 replica.files = None else: # incomplete - since we cannot know what files are there, we'll just have to pretend there is none replica.size = 0 replica.files = tuple() replica.is_custodial = (sub_entry['custodial'] == 'y') if sub_entry['time_update'] is not None: replica.last_update = 0 else: replica.last_update = int(sub_entry['time_update']) return block_replicas
def get_updated_replicas(self, updated_since, inventory): #override LOG.info( 'get_updated_replicas(%d) Fetching the list of replicas from PhEDEx', updated_since) nodes = [] for entry in self._phedex.make_request('nodes', timeout=600): if not self.check_allowed_site(entry['name']): continue if entry['name'] not in inventory.sites: continue nodes.append(entry['name']) try: tmpconfig = Configuration( self._parallelizer_config.get('parallel', None)) except Exception as e: LOG.error(str(e)) tmpconfig = Configuration() parallelizer = Map(tmpconfig) parallelizer.timeout = 5400 def get_node_replicas(node): options = ['update_since=%d' % updated_since, 'node=%s' % node] results = self._phedex.make_request('blockreplicas', options) return node, results # Use async to fire threads on demand node_results = parallelizer.execute(get_node_replicas, nodes, async=True) # Automatically starts a thread as we add the output of block_replicas combine_file = parallelizer.get_starter(self._combine_file_info) all_block_entries = [] for node, block_entries in node_results: site = inventory.sites[node] for block_entry in block_entries: all_block_entries.append(block_entry) replica_entry = block_entry['replica'][0] if replica_entry['complete'] == 'y': continue # incomplete block replica - should we fetch file info? try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: pass else: try: dataset = inventory.datasets[dataset_name] block = dataset.find_block(block_name) replica = block.find_replica(site) if replica.file_ids is None: num_files = block.num_files else: num_files = len(replica.file_ids) if replica.size == replica_entry[ 'bytes'] and num_files == replica_entry[ 'files']: # no we don't have to continue except: # At any point of the above lookups we may hit a None object or KeyError or what not pass LOG.debug( 'Replica %s:%s is incomplete. Fetching file information.', replica_entry['node'], block_entry['name']) combine_file.add_input(block_entry) combine_file.close() # _combine_file_info alters block_entries directly - no need to deal with output combine_file.get_outputs() LOG.info('get_updated_replicas(%d) Got outputs' % updated_since) return PhEDExReplicaInfoSource.make_block_replicas( all_block_entries, PhEDExReplicaInfoSource.maker_blockreplicas, dataset_check=self.check_allowed_dataset)
def run(self, caller, request, inventory): if caller.name not in self.authorized_users: raise AuthorizationError() try: item = request['item'] except KeyError: raise MissingParameter('item') if type(item) is list: items = item else: items = [item] invalidated_items = [] sql = 'INSERT INTO `invalidations` (`item`, `db`, `user_id`, `timestamp`) VALUES (%s, %s, %s, NOW())' for item in items: invalidated = False if item in inventory.datasets: # item is a dataset result = self.dbs.make_request('datasets', [ 'dataset=' + item, 'dataset_access_type=*', 'detail=true' ]) if len(result) != 0: status = result[0]['dataset_access_type'] if status in ('VALID', 'PRODUCTION'): self.registry.db.query(sql, item, 'dbs', caller.id) for entry in self.dbs.make_request( 'files', ['dataset=' + item, 'validFileOnly=1']): self.registry.db.query(sql, entry['logical_file_name'], 'dbs', caller.id) invalidated = True result = self.phedex.make_request( 'data', ['dataset=' + item, 'level=block']) if len(result) != 0: self.registry.db.query(sql, item, 'tmdb', caller.id) invalidated = True else: try: dataset_name, block_name = Block.from_full_name(item) except: lfile = inventory.find_file(item) if lfile is not None: # item is a file result = self.dbs.make_request( 'files', ['logical_file_name=' + item, 'validFileOnly=1']) if len(result) != 0: self.registry.db.query( sql, result[0]['logical_file_name'], 'dbs', caller.id) invalidated = True result = self.phedex.make_request( 'data', ['file=' + item]) if len(result) != 0: self.registry.db.query(sql, item, 'tmdb', caller.id) invalidated = True else: # item is a block for entry in self.dbs.make_request( 'files', ['block_name=' + item, 'validFileOnly=1']): self.registry.db.query(sql, entry['logical_file_name'], 'dbs', caller.id) invalidated = True result = self.phedex.make_request( 'data', ['block=' + item, 'level=block']) if len(result) != 0: self.registry.db.query(sql, item, 'tmdb', caller.id) invalidated = True if invalidated: invalidated_items.append({'item': item}) return invalidated_items
def load(self, inventory): for dataset in inventory.datasets.itervalues(): try: dataset.attr.pop('locked_blocks') except KeyError: pass if len(self.users) != 0: entries = self._mysql.select_many('detox_locks', ('item', 'sites', 'groups'), ('user_id', 'role_id'), self.users) else: query = 'SELECT `item`, `sites`, `groups` FROM `detox_locks`' entries = self._mysql.query(query) for item_name, sites_pattern, groups_pattern in entries: # wildcard not allowed in block name try: dataset_pattern, block_name = Block.from_full_name(item_name) except ObjectError: dataset_pattern, block_name = item_name, None if '*' in dataset_pattern: pat_exp = re.compile(fnmatch.translate(dataset_pattern)) datasets = [] for dataset in inventory.datasets.values(): # this is highly inefficient but I can't think of a better way if pat_exp.match(dataset.name): datasets.append(dataset) else: try: dataset = inventory.datasets[dataset_pattern] except KeyError: LOG.debug('Cannot lock unknown dataset %s', dataset_pattern) continue datasets = [dataset] specified_sites = [] if sites_pattern: if sites_pattern == '*': pass elif '*' in sites_pattern: pat_exp = re.compile(fnmatch.translate(sites_pattern)) specified_sites.extend(s for n, s in inventory.sites.iteritems() if pat_exp.match(n)) else: try: specified_sites.append(inventory.sites[sites_pattern]) except KeyError: pass specified_groups = [] if groups_pattern: if groups_pattern == '*': pass elif '*' in groups_pattern: pat_exp = re.compile(fnmatch.translate(groups_pattern)) specified_groups.extend(g for n, g in inventory.groups.iteritems() if pat_exp.match(n)) else: try: specified_groups.append(inventory.groups[groups_pattern]) except KeyError: pass for dataset in datasets: sites = set(specified_sites) groups = set(specified_groups) if len(sites) == 0: # either sites_pattern was not given (global lock) or no sites matched (typo?) # we will treat this as a global lock sites.update(r.site for r in dataset.replicas) if len(groups) == 0: # if no group matches the pattern, we will be on the safe side and treat it as a global lock for replica in dataset.replicas: groups.update(brep.group for brep in replica.block_replicas) try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: locked_blocks = dataset.attr['locked_blocks'] = {} if block_name is None: for replica in dataset.replicas: if replica.site not in sites: continue if replica.site not in locked_blocks: locked_blocks[replica.site] = set() for block_replica in replica.block_replicas: if block_replica.group not in groups: continue locked_blocks[replica.site].add(block_replica.block) else: block = dataset.find_block(block_name) if block is None: LOG.debug('Cannot lock unknown block %s', block_name) continue for replica in block.replicas: if replica.site not in sites: continue if replica.group not in groups: continue if replica.site not in locked_blocks: locked_blocks[replica.site] = set([block]) else: locked_blocks[replica.site].add(block) for dataset in inventory.datasets.itervalues(): try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: continue for site, blocks in locked_blocks.items(): if blocks is None: continue # if all blocks are locked, set to None (dataset-level lock) if blocks == dataset.blocks: locked_blocks[site] = None LOG.info('Locked %d items.', len(entries))