def get_file(self, name): ## Get the file data from PhEDEx result = self._phedex.make_request('data', ['file=' + name, 'level=file']) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] file_entry = block_entry['file'][0] except: return None if not self.check_allowed_deataset(dataset_entry['name']): return None bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) # Just need a named object dataset = Dataset(dataset_entry['name']) block = Block(block_name, dataset) lfile = self._create_file(file_entry, block) return lfile
def _load_datasets(self, inventory, dataset_names, id_dataset_map): sql = 'SELECT d.`id`, d.`name`, d.`size`, d.`num_files`, d.`status`+0, d.`data_type`+0, s.`cycle`, s.`major`, s.`minor`, s.`suffix`, UNIX_TIMESTAMP(d.`last_update`), d.`is_open`' sql += ' FROM `datasets` AS d' sql += ' LEFT JOIN `software_versions` AS s ON s.`id` = d.`software_version_id`' if dataset_names is not None: # first dump the dataset ids into a temporary table, then constrain the original table self._mysql.query( 'CREATE TABLE `datasets_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))' ) sqlbase = 'INSERT INTO `datasets_load_tmp` SELECT `id` FROM `datasets`' self._mysql.execute_many(sqlbase, 'name', dataset_names) sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = d.`id`' for dataset_id, name, size, num_files, status, data_type, sw_cycle, sw_major, sw_minor, sw_suffix, last_update, is_open in self._mysql.xquery( sql): # size and num_files are reset when loading blocks dataset = Dataset(name, size=size, num_files=num_files, status=int(status), data_type=int(data_type), last_update=last_update, is_open=(is_open == 1)) if sw_cycle is None: dataset.software_version = None else: dataset.software_version = (sw_cycle, sw_major, sw_minor, sw_suffix) inventory.datasets[name] = dataset id_dataset_map[dataset_id] = dataset
def get_block(self, name, dataset=None, with_files=False): #override ## Get the full block-file data from PhEDEx if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request('data', ['block=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] except: return None if dataset is None: link_dataset = False # Just need a named object dataset = Dataset(dataset_entry['name']) else: link_dataset = True if dataset.name != dataset_entry['name']: raise IntegrityError( 'Inconsistent dataset %s passed to get_block(%s)', dataset.name, name) block = self._create_block(block_entry, dataset) if with_files and 'file' in block_entry: # _create_block sets size and num_files; just need to update the files list # Directly creating the _files set # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block # from this function is only used temporarily anyway block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) if link_dataset: existing = dataset.find_block(block.name) if existing is None: dataset.blocks.add(block) dataset.size += block.size dataset.num_files += block.num_files else: dataset.blocks.remove(existing) dataset.size += block.size - existing.size dataset.num_files += block.num_files - existing.num_files return block
def _create_dataset(self, dataset_entry): """ Create a dataset object with blocks and files from a PhEDEx dataset entry """ dataset = Dataset(dataset_entry['name'], is_open=(dataset_entry['is_open'] == 'y')) if 'time_update' in dataset_entry and dataset_entry[ 'time_update'] is not None: dataset.last_update = int(dataset_entry['time_update']) else: dataset.last_update = int(dataset_entry['time_create']) ## Get other details of the dataset from DBS self._fill_dataset_details(dataset) return dataset
def make_block_replicas(dataset_entries, replica_maker): """Return a list of block replicas linked to Dataset, Block, Site, and Group""" block_replicas = [] for dataset_entry in dataset_entries: dataset = Dataset(dataset_entry['name']) for block_entry in dataset_entry['block']: name = block_entry['name'] try: block_name = Block.to_internal_name(name[name.find('#') + 1:]) except ValueError: # invalid name continue block = Block(block_name, dataset, block_entry['bytes']) block_replicas.extend(replica_maker(block, block_entry)) return block_replicas
def get_file(self, name, block=None): ## Get the file data from PhEDEx result = self._phedex.make_request('data', ['file=' + name, 'level=file']) try: block_entry = result[0]['dataset'][0]['block'][0] file_entry = block_entry['file'][0] except: return None bname = block_entry['name'] block_name = Block.to_internal_name(bname[bname.find('#') + 1:]) if block is None: link_block = False # Just need a named object dataset = Dataset(dataset_entry['name']) block = Block(block_name, dataset) else: link_block = True if block.name != block_name: raise IntegrityError( 'Inconsistent block %s passed to get_file(%s)', block.full_name(), name) lfile = self._create_file(file_entry, block) if link_block: # Caution - by adding this file we edit the block properties too existing = block.find_file(lfile.fid()) if existing is None: block.add_file(lfile) else: block.remove_file(existing) block.add_file(lfile) return lfile
def make_block_replicas(block_entries, replica_maker, site_check=None, dataset_check=None): """Return a list of block replicas linked to Dataset, Block, Site, and Group""" dataset = None block_replicas = [] for block_entry in block_entries: try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: # invalid name continue if dataset is None or dataset.name != dataset_name: if dataset_check and not dataset_check(dataset_name): continue try: dataset = Dataset(dataset_name) except ObjectError: # invalid name dataset = None if dataset is None: continue block = Block(block_name, dataset, block_entry['bytes']) if block.size is None: block.size = 0 block_replicas.extend( replica_maker(block, block_entry, site_check=site_check)) return block_replicas
def get_block(self, name, with_files=False): #override ## Get the full block-file data from PhEDEx if not name.startswith('/') or name.count('/') != 3 or '#' in name: return None if not self.check_allowed_dataset(name[:name.find('#')]): return None if with_files: level = 'file' else: level = 'block' result = self._phedex.make_request('data', ['block=' + name, 'level=' + level]) try: dataset_entry = result[0]['dataset'][0] block_entry = dataset_entry['block'][0] except: return None # Just need a named object dataset = Dataset(dataset_entry['name']) block = self._create_block(block_entry, dataset) if with_files and 'file' in block_entry: # _create_block sets size and num_files; just need to update the files list # Directly creating the _files set # This list will persist (unlike the weak proxy version loaded from inventory), but the returned block # from this function is only used temporarily anyway block._files = set() for file_entry in block_entry['file']: block._files.add(self._create_file(file_entry, block)) return block