def create_dbs3_proto_blocks(opts, dataset_blocks): for dataset in dataset_blocks: missing_info_blocks = [] dataset_types = set() for block in dataset_blocks[dataset]: block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []} (block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump) if len(block_dataset_types) > 1: raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block)) elif len(block_dataset_types) == 1: yield (block, block_dump, block_size, block_dataset_types.pop()) else: missing_info_blocks.append((block, block_dump, block_size)) dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information if missing_info_blocks: if len(dataset_types) > 1: raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info') elif len(dataset_types) == 0: if not opts.datatype: raise Exception('Please supply dataset type via --datatype!') dataset_type = opts.datatype else: dataset_type = dataset_types.pop() for (block, block_dump, block_size) in missing_info_blocks: yield (block, block_dump, block_size, dataset_type)
def create_dbs3_json_blocks(opts, dataset_blocks): for (block, block_dump, block_size, dataset_type) in create_dbs3_proto_blocks(opts, dataset_blocks): dataset = block[DataProvider.Dataset] try: primary_dataset, processed_dataset, data_tier = dataset[1:].split( '/') except Exception: raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset) # add primary dataset information block_dump['primds'] = { 'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset } # add dataset information block_dump['dataset'] = { 'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier, 'physics_group_name': None, 'dataset_access_type': 'VALID', 'xtcrosssection': None, # TODO: Add to meta data from FrameWorkJobReport, if possible! } # add block information site_db = SiteDB() try: origin_site_name = site_db.se_to_cms_name( block[DataProvider.Locations][0])[0] except IndexError: origin_site_name = 'UNKNOWN' block_dump['block'] = { 'block_name': DataProvider.bName(block), 'block_size': block_size, 'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name } if opts.do_close_blocks: block_dump['block']['open_for_writing'] = 0 else: block_dump['block']['open_for_writing'] = 1 # add acquisition_era, CRAB is important because of checks within DBS 3 block_dump['acquisition_era'] = { 'acquisition_era_name': 'CRAB', 'start_date': 0 } # add processing_era block_dump['processing_era'] = { 'processing_version': 1, 'description': 'grid-control' } yield validate_dbs3_json('blockBulk', block_dump)
def create_dbs3_json_blocks(opts, dataset_blocks): for (block, block_dump, block_size, dataset_type) in create_dbs3_proto_blocks(opts, dataset_blocks): dataset = block[DataProvider.Dataset] try: primary_dataset, processed_dataset, data_tier = dataset[1:].split('/') except Exception: raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset) # add primary dataset information block_dump['primds'] = {'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset} # add dataset information block_dump['dataset'] = { 'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier, 'physics_group_name': None, 'dataset_access_type': 'VALID', 'xtcrosssection': None, # TODO: Add to meta data from FrameWorkJobReport, if possible! } # add block information site_db = SiteDB() try: origin_site_name = site_db.se_to_cms_name(block[DataProvider.Locations][0])[0] except IndexError: origin_site_name = 'UNKNOWN' block_dump['block'] = {'block_name': DataProvider.bName(block), 'block_size': block_size, 'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name} if opts.do_close_blocks: block_dump['block']['open_for_writing'] = 0 else: block_dump['block']['open_for_writing'] = 1 # add acquisition_era, CRAB is important because of checks within DBS 3 block_dump['acquisition_era'] = {'acquisition_era_name': 'CRAB', 'start_date': 0} # add processing_era block_dump['processing_era'] = {'processing_version': 1, 'description': 'grid-control'} yield validate_dbs3_json('blockBulk', block_dump)
def processBlock(self, block): if self._lumi_filter.empty() and ((self._lumi_keep == LumiKeep.RunLumi) or (DataProvider.Metadata not in block)): return block def getMetadataIdx(key): if key in block.get(DataProvider.Metadata, []): return block[DataProvider.Metadata].index(key) idxRuns = getMetadataIdx('Runs') idxLumi = getMetadataIdx('Lumi') if not self._lumi_filter.empty(): lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector = False) if lumi_filter and (self._lumi_strict == LumiMode.strict) and ((idxRuns is None) or (idxLumi is None)): raise DatasetError('Strict lumi filter active but dataset %s does not provide lumi information!' % DataProvider.bName(block)) elif lumi_filter and (self._lumi_strict == LumiMode.weak) and (idxRuns is None): raise DatasetError('Weak lumi filter active but dataset %s does not provide run information!' % DataProvider.bName(block)) block[DataProvider.FileList] = list(self._processFI(block, idxRuns, idxLumi)) if not block[DataProvider.FileList]: return block[DataProvider.NEntries] = sum(imap(lambda fi: fi[DataProvider.NEntries], block[DataProvider.FileList])) # Prune metadata if self._lumi_keep == LumiKeep.RunLumi: return block elif self._lumi_keep == LumiKeep.Run: idxRuns = None removeRunLumi(block[DataProvider.Metadata], idxRuns, idxLumi) return block