def make_request(self, resource='', options=[], method=GET, format='url', retry_on_error=True, timeout=0): #override """ Strip the "header" and return the body JSON. """ response = RESTService.make_request(self, resource, options=options, method=method, format=format, retry_on_error=retry_on_error, timeout=timeout) try: result = response['csvdata'] except KeyError: LOG.error(response) return return result
def make_request(self, resource='', options=[], method=GET, format='url', retry_on_error=True, timeout=0): #override """ Strip the "header" and return the body JSON. """ return RESTService.make_request(self, resource, options=options, method=method, format=format, retry_on_error=retry_on_error, timeout=timeout)
def make_request(self, resource='', options=[], method=GET, format='url', retry_on_error=True, timeout=1800): #override LOG.debug('%s %s', resource, options) response = RESTService.make_request(self, resource, options=options, method=method, format=format, retry_on_error=retry_on_error, timeout=timeout) try: result = response['phedex'] except KeyError: LOG.error(response) return for metadata in [ 'request_timestamp', 'instance', 'request_url', 'request_version', 'request_call', 'call_time', 'request_date' ]: result.pop(metadata) if LOG.getEffectiveLevel() == logging.DEBUG: res_str = pprint.pformat(result) if len(res_str) > 100: res_str = res_str[:99] + '..\n' + res_str[-1] LOG.debug(res_str) # the only one item left in the results should be the result body. Clone the keys to use less memory.. key = result.keys()[0] body = result[key] return body
class PhEDExDatasetInfoSource(DatasetInfoSource): """DatasetInfoSource using PhEDEx and DBS.""" def __init__(self, config): DatasetInfoSource.__init__(self, config) self._phedex = PhEDEx(config.phedex) self._dbs = RESTService(config.dbs) def get_dataset_names(self, include=['*'], exclude=[]): dataset_names = [] exclude_exps = [] for pattern in exclude: exclude_exps.append(re.compile(fnmatch.translate(pattern))) def add_datasets(result): for entry in result: name = entry['dataset'] for ex_exp in exclude_exps: if ex_exp.match(name): break else: # not excluded dataset_names.append(name) if len(include) == 1 and include[0] == '/*/*/*': # all datasets requested - will do this efficiently result = self._dbs.make_request('acquisitioneras') sds = [entry['acquisition_era_name'] for entry in result] # query DBS in parallel args = [('datasets', ['acquisition_era_name=' + sd]) for sd in sds] results = Map().execute(self._dbs.make_request, args) for result in results: add_datasets(result) for in_pattern in include: result = self._dbs.make_request('datasets', ['dataset=' + in_pattern]) add_datasets(result) return dataset_names def get_updated_datasets(self, updated_since): #override LOG.warning( 'PhEDExDatasetInfoSource can only return a list of datasets and blocks that are created since the given timestamp.' ) result = self._phedex.make_request('data', [ 'dataset=' + name, 'level=block', 'create_since=%d' % updated_since ]) if len(result) == 0 or 'dataset' not in result[0]: return [] updated_datasets = [] for dataset_entry in result[0]['dataset']: dataset = self._create_dataset(dataset_entry) updated_datasets.append(dataset) return updated_datasets def get_dataset(self, name, with_files=False): #override ## Get the full dataset-block-file data from PhEDEx if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request( 'data', ['dataset=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] except: return None ## Create the dataset object dataset = self._create_dataset(dataset_entry) ## Fill block and file data if 'block' in dataset_entry: for block_entry in dataset_entry['block']: block = self._create_block(block_entry, dataset) dataset.blocks.add(block) # size and num_files are left 0 in _create_dataset (PhEDEx does not tell) dataset.size += block.size dataset.num_files += block.num_files if with_files and 'file' in block_entry: # See comments in get_block block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) return dataset def get_block(self, name, dataset=None, with_files=False): #override ## Get the full block-file data from PhEDEx if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request('data', ['block=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] except: return None if dataset is None: link_dataset = False # Just need a named object dataset = Dataset(dataset_entry['name']) else: link_dataset = True if dataset.name != dataset_entry['name']: raise IntegrityError( 'Inconsistent dataset %s passed to get_block(%s)', dataset.name, name) block = self._create_block(block_entry, dataset) if with_files and 'file' in block_entry: # _create_block sets size and num_files; just need to update the files list # Directly creating the _files set # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block # from this function is only used temporarily anyway block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) if link_dataset: existing = dataset.find_block(block.name) if existing is None: dataset.blocks.add(block) dataset.size += block.size dataset.num_files += block.num_files else: dataset.blocks.remove(existing) dataset.size += block.size - existing.size dataset.num_files += block.num_files - existing.num_files return block def get_file(self, name, block=None): ## Get the file data from PhEDEx result = self._phedex.make_request('data', ['file=' + name, 'level=file']) try: block_entry = result[0]['dataset'][0]['block'][0] file_entry = block_entry['file'][0] except: return None bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) if block is None: link_block = False # Just need a named object dataset = Dataset(dataset_entry['name']) block = Block(block_name, dataset) else: link_block = True if block.name != block_name: raise IntegrityError( 'Inconsistent block %s passed to get_file(%s)', block.full_name(), name) lfile = self._create_file(file_entry, block) if link_block: # Caution - by adding this file we edit the block properties too existing = block.find_file(lfile.fid()) if existing is None: block.add_file(lfile) else: block.remove_file(existing) block.add_file(lfile) return lfile def get_files(self, dataset_or_block): #override files = set() if type(dataset_or_block) is Dataset: result = self._phedex.make_request( 'data', ['dataset=' + dataset_or_block.name, 'level=file']) blocks = dict((b.name, b) for b in dataset_or_block.blocks) else: result = self._phedex.make_request( 'data', ['block=' + dataset_or_block.full_name(), 'level=file']) blocks = {dataset_or_block.name: dataset_or_block} try: block_entries = result[0]['dataset'][0]['block'] except: return files for block_entry in block_entries: try: file_entries = block_entry['file'] except: continue bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) try: block = blocks[block_name] except: # unknown block! maybe should raise? continue for file_entry in file_entries: files.add(self._create_file(file_entry, block)) return files def _create_dataset(self, dataset_entry): """ Create a dataset object with blocks and files from a PhEDEx dataset entry """ dataset = Dataset(dataset_entry['name'], is_open=(dataset_entry['is_open'] == 'y')) if 'time_update' in dataset_entry and dataset_entry[ 'time_update'] is not None: dataset.last_update = int(dataset_entry['time_update']) else: dataset.last_update = int(dataset_entry['time_create']) ## Get other details of the dataset from DBS self._fill_dataset_details(dataset) return dataset def _create_block(self, block_entry, dataset): """ Create a block object with files from a PhEDEx block entry """ bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) block = Block(block_name, dataset, size=block_entry['bytes'], num_files=block_entry['files'], is_open=(block_entry['is_open'] == 'y')) if 'time_update' in block_entry and block_entry[ 'time_update'] is not None: block.last_update = int(block_entry['time_update']) else: block.last_update = int(block_entry['time_create']) return block def _create_file(self, file_entry, block): lfile = File(file_entry['lfn'], block=block, size=file_entry['size']) return lfile def _fill_dataset_details(self, dataset): # 1. status and PD type result = self._dbs.make_request('datasets', [ 'dataset=' + dataset.name, 'dataset_access_type=*', 'detail=True' ]) if len(result) != 0: dbs_entry = result[0] dataset.status = Dataset.status_val( dbs_entry['dataset_access_type']) dataset.data_type = Dataset.data_type_val( dbs_entry['primary_ds_type']) else: dataset.status = Dataset.STAT_UNKNOWN dataset.data_type = Dataset.TYPE_UNKNOWN # 2. software version result = self._dbs.make_request('releaseversions', ['dataset=' + dataset.name]) if len(result) != 0: try: version = result[0]['release_version'][0] except KeyError: pass else: matches = re.match('CMSSW_([0-9]+)_([0-9]+)_([0-9]+)(|_.*)', version) if matches: cycle, major, minor = map( int, [matches.group(i) for i in range(1, 4)]) if matches.group(4): suffix = matches.group(4)[1:] else: suffix = '' dataset.software_version = (cycle, major, minor, suffix)