def run(self, caller, request, inventory): datasets = [] # collect information from the inventory and registry according to the requests if 'dataset' in request: match_name = request['dataset'] if '*' in match_name: pattern = re.compile(fnmatch.translate(match_name)) for name in inventory.datasets.iterkeys(): if pattern.match(name): datasets.append(inventory.datasets[name]) else: try: datasets.append(inventory.datasets[match_name]) except KeyError: pass response = [] for dataset in datasets: response.append({'name': dataset.name, 'size': dataset.size, 'num_files': dataset.num_files, 'status': Dataset.status_name(dataset.status), 'type': Dataset.data_type_name(dataset.data_type)}) # return any JSONizable python object (maybe should be limited to a list) return response
def _load_datasets(self, inventory, dataset_names, id_dataset_map): sql = 'SELECT d.`id`, d.`name`, d.`size`, d.`num_files`, d.`status`+0, d.`data_type`+0, s.`cycle`, s.`major`, s.`minor`, s.`suffix`, UNIX_TIMESTAMP(d.`last_update`), d.`is_open`' sql += ' FROM `datasets` AS d' sql += ' LEFT JOIN `software_versions` AS s ON s.`id` = d.`software_version_id`' if dataset_names is not None: # first dump the dataset ids into a temporary table, then constrain the original table self._mysql.query( 'CREATE TABLE `datasets_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))' ) sqlbase = 'INSERT INTO `datasets_load_tmp` SELECT `id` FROM `datasets`' self._mysql.execute_many(sqlbase, 'name', dataset_names) sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = d.`id`' for dataset_id, name, size, num_files, status, data_type, sw_cycle, sw_major, sw_minor, sw_suffix, last_update, is_open in self._mysql.xquery( sql): # size and num_files are reset when loading blocks dataset = Dataset(name, size=size, num_files=num_files, status=int(status), data_type=int(data_type), last_update=last_update, is_open=(is_open == 1)) if sw_cycle is None: dataset.software_version = None else: dataset.software_version = (sw_cycle, sw_major, sw_minor, sw_suffix) inventory.datasets[name] = dataset id_dataset_map[dataset_id] = dataset
def get_block(self, name, dataset=None, with_files=False): #override ## Get the full block-file data from PhEDEx if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request('data', ['block=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] except: return None if dataset is None: link_dataset = False # Just need a named object dataset = Dataset(dataset_entry['name']) else: link_dataset = True if dataset.name != dataset_entry['name']: raise IntegrityError( 'Inconsistent dataset %s passed to get_block(%s)', dataset.name, name) block = self._create_block(block_entry, dataset) if with_files and 'file' in block_entry: # _create_block sets size and num_files; just need to update the files list # Directly creating the _files set # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block # from this function is only used temporarily anyway block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) if link_dataset: existing = dataset.find_block(block.name) if existing is None: dataset.blocks.add(block) dataset.size += block.size dataset.num_files += block.num_files else: dataset.blocks.remove(existing) dataset.size += block.size - existing.size dataset.num_files += block.num_files - existing.num_files return block
def _fill_dataset_details(self, dataset, dbs_data=None): if dbs_data is None: dbs_data = {} if dataset.name.startswith('/') and dataset.name.count('/') == 3: dbs_data['datasets'] = self._dbs.make_request( 'datasets', [ 'dataset=' + dataset.name, 'dataset_access_type=*', 'detail=True' ]) else: dbs_data['datasets'] = [] dbs_data['releaseversions'] = self._dbs.make_request( 'releaseversions', ['dataset=' + dataset.name]) # 1. status and PD type if len(dbs_data['datasets']) != 0: dbs_entry = dbs_data['datasets'][0] dataset.status = Dataset.status_val( dbs_entry['dataset_access_type']) dataset.data_type = Dataset.data_type_val( dbs_entry['primary_ds_type']) else: dataset.status = Dataset.STAT_UNKNOWN dataset.data_type = Dataset.TYPE_UNKNOWN # 2. software version if len(dbs_data['releaseversions']) != 0: try: version = dbs_data['releaseversions'][0]['release_version'][0] except KeyError: pass else: matches = re.match('CMSSW_([0-9]+)_([0-9]+)_([0-9]+)(|_.*)', version) if matches: cycle, major, minor = map( int, [matches.group(i) for i in range(1, 4)]) if matches.group(4): suffix = matches.group(4)[1:] else: suffix = '' dataset.software_version = (cycle, major, minor, suffix)
def get_file(self, name): ## Get the file data from PhEDEx result = self._phedex.make_request('data', ['file=' + name, 'level=file']) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] file_entry = block_entry['file'][0] except: return None if not self.check_allowed_deataset(dataset_entry['name']): return None bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) # Just need a named object dataset = Dataset(dataset_entry['name']) block = Block(block_name, dataset) lfile = self._create_file(file_entry, block) return lfile
def _create_dataset(self, dataset_entry): """ Create a dataset object with blocks and files from a PhEDEx dataset entry """ dataset = Dataset(dataset_entry['name'], is_open=(dataset_entry['is_open'] == 'y')) if 'time_update' in dataset_entry and dataset_entry[ 'time_update'] is not None: dataset.last_update = int(dataset_entry['time_update']) else: dataset.last_update = int(dataset_entry['time_create']) ## Get other details of the dataset from DBS self._fill_dataset_details(dataset) return dataset
def customize_stats(categories): categories.categories = collections.OrderedDict([ ('campaign', ('Production campaign', Dataset, campaign_name)), ('data_tier', ('Data tier', Dataset, lambda d: d.name[d.name.rfind('/') + 1:])), ('dataset_status', ('Dataset status', Dataset, lambda d: Dataset.status_name(d.status))), ('dataset', ('Dataset name', Dataset, lambda d: d.name)), ('site', ('Site name', Site, lambda s: s.name)), ('group', ('Group name', Group, lambda g: g.name)) ])
class InventoryStatCategories(object): """ Just a holder for available data categorization. Specify (category_name, (category_title, target, mapping)) where target is either Dataset, Site, or Group and mapping is a function that takes an instance of the target class and returns a value to be used for categorization. Categories can be made specific to the Dynamo instance using _customize.customize_stats. """ categories = collections.OrderedDict([ ('data_type', ('Dataset type', Dataset, lambda d: Dataset.data_type_name(d.data_type))), ('dataset_status', ('Dataset status', Dataset, lambda d: Dataset.status_name(d.status))), ('dataset_software_version', ('Dataset software version', Dataset, lambda d: d.software_version)), ('dataset', ('Dataset name', Dataset, lambda d: d.name)), ('site', ('Site name', Site, lambda s: s.name)), ('site_status', ('Site status', Site, lambda s: Site.status_name(s.status))), ('group', ('Group name', Group, lambda g: g.name)) ])
def make_block_replicas(dataset_entries, replica_maker): """Return a list of block replicas linked to Dataset, Block, Site, and Group""" block_replicas = [] for dataset_entry in dataset_entries: dataset = Dataset(dataset_entry['name']) for block_entry in dataset_entry['block']: name = block_entry['name'] try: block_name = Block.to_internal_name(name[name.find('#') + 1:]) except ValueError: # invalid name continue block = Block(block_name, dataset, block_entry['bytes']) block_replicas.extend(replica_maker(block, block_entry)) return block_replicas
def get_file(self, name, block=None): ## Get the file data from PhEDEx result = self._phedex.make_request('data', ['file=' + name, 'level=file']) try: block_entry = result[0]['dataset'][0]['block'][0] file_entry = block_entry['file'][0] except: return None bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) if block is None: link_block = False # Just need a named object dataset = Dataset(dataset_entry['name']) block = Block(block_name, dataset) else: link_block = True if block.name != block_name: raise IntegrityError( 'Inconsistent block %s passed to get_file(%s)', block.full_name(), name) lfile = self._create_file(file_entry, block) if link_block: # Caution - by adding this file we edit the block properties too existing = block.find_file(lfile.fid()) if existing is None: block.add_file(lfile) else: block.remove_file(existing) block.add_file(lfile) return lfile
def make_block_replicas(block_entries, replica_maker, site_check=None, dataset_check=None): """Return a list of block replicas linked to Dataset, Block, Site, and Group""" dataset = None block_replicas = [] for block_entry in block_entries: try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: # invalid name continue if dataset is None or dataset.name != dataset_name: if dataset_check and not dataset_check(dataset_name): continue try: dataset = Dataset(dataset_name) except ObjectError: # invalid name dataset = None if dataset is None: continue block = Block(block_name, dataset, block_entry['bytes']) if block.size is None: block.size = 0 block_replicas.extend( replica_maker(block, block_entry, site_check=site_check)) return block_replicas
def get_block(self, name, with_files=False): #override ## Get the full block-file data from PhEDEx if not name.startswith('/') or name.count('/') != 3 or '#' in name: return None if not self.check_allowed_dataset(name[:name.find('#')]): return None if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request('data', ['block=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] except: return None # Just need a named object dataset = Dataset(dataset_entry['name']) block = self._create_block(block_entry, dataset) if with_files and 'file' in block_entry: # _create_block sets size and num_files; just need to update the files list # Directly creating the _files set # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block # from this function is only used temporarily anyway block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) return block