def _check_lumi_filter(self, block, idx_runs, idx_lumi): lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector=False) if not lumi_filter: return if (self._lumi_strict == LumiMode.strict) and ((idx_runs is None) or (idx_lumi is None)): raise DatasetError('Strict lumi filter active but ' + 'dataset %s does not provide lumi information!' % DataProvider.get_block_id(block)) elif (self._lumi_strict == LumiMode.weak) and (idx_runs is None): raise DatasetError('Weak lumi filter active but ' + 'dataset %s does not provide run information!' % DataProvider.get_block_id(block))
def _check_lumi_filter(self, block, idx_runs, idx_lumi): lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector=False) if not lumi_filter: return if (self._lumi_strict == LumiMode.strict) and ((idx_runs is None) or (idx_lumi is None)): raise DatasetError('Strict lumi filter active but ' + 'dataset %s does not provide lumi information!' % DataProvider.get_block_id(block)) elif (self._lumi_strict == LumiMode.weak) and (idx_runs is None): raise DatasetError('Weak lumi filter active but ' + 'dataset %s does not provide run information!' % DataProvider.get_block_id(block))
def create_dbs3_json_blocks(opts, dataset_blocks): dbs3_proto_block_iter = create_dbs3_proto_blocks(opts, dataset_blocks) for (block, block_dump, block_size, dataset_type) in dbs3_proto_block_iter: dataset = block[DataProvider.Dataset] try: primary_dataset, processed_dataset, data_tier = dataset[1:].split( '/') except Exception: raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset) # add primary dataset information block_dump['primds'] = { 'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset } # add dataset information block_dump['dataset'] = { 'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier, 'physics_group_name': None, 'dataset_access_type': 'VALID', 'xtcrosssection': None, # TODO: Add to metadata from FrameWorkJobReport, if possible! } # add block information site_db = CRIC() try: origin_site_name = site_db.se_to_cms_name( block[DataProvider.Locations][0])[0] except IndexError: clear_current_exception() origin_site_name = 'UNKNOWN' block_dump['block'] = { 'block_name': DataProvider.get_block_id(block), 'block_size': block_size, 'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name } if opts.do_close_blocks: block_dump['block']['open_for_writing'] = 0 else: block_dump['block']['open_for_writing'] = 1 # add acquisition_era, CRAB is important because of checks within DBS 3 block_dump['acquisition_era'] = { 'acquisition_era_name': 'CRAB', 'start_date': 0 } # add processing_era block_dump['processing_era'] = { 'processing_version': 1, 'description': 'grid-control' } yield validate_dbs3_json('blockBulk', block_dump)
def create_dbs3_proto_blocks(opts, dataset_blocks): for dataset in dataset_blocks: missing_info_blocks = [] dataset_types = set() for block in dataset_blocks[dataset]: block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []} (block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump) if len(block_dataset_types) > 1: raise Exception('Data and MC files are mixed in block %s' % DataProvider.get_block_id(block)) elif len(block_dataset_types) == 1: yield (block, block_dump, block_size, block_dataset_types.pop()) else: missing_info_blocks.append((block, block_dump, block_size)) # collect dataset types in this dataset for blocks with missing type information dataset_types.update(block_dataset_types) if missing_info_blocks: if len(dataset_types) > 1: raise Exception('Data and MC files are mixed in dataset %s! ' + 'Unable to determine dataset type for blocks without type info') elif len(dataset_types) == 0: if not opts.datatype: raise Exception('Please supply dataset type via --datatype!') dataset_type = opts.datatype else: dataset_type = dataset_types.pop() for (block, block_dump, block_size) in missing_info_blocks: yield (block, block_dump, block_size, dataset_type)
def create_dbs3_json_blocks(opts, dataset_blocks): dbs3_proto_block_iter = create_dbs3_proto_blocks(opts, dataset_blocks) for (block, block_dump, block_size, dataset_type) in dbs3_proto_block_iter: dataset = block[DataProvider.Dataset] try: primary_dataset, processed_dataset, data_tier = dataset[1:].split('/') except Exception: raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset) # add primary dataset information block_dump['primds'] = {'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset} # add dataset information block_dump['dataset'] = { 'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier, 'physics_group_name': None, 'dataset_access_type': 'VALID', 'xtcrosssection': None, # TODO: Add to metadata from FrameWorkJobReport, if possible! } # add block information site_db = SiteDB() try: origin_site_name = site_db.se_to_cms_name(block[DataProvider.Locations][0])[0] except IndexError: clear_current_exception() origin_site_name = 'UNKNOWN' block_dump['block'] = {'block_name': DataProvider.get_block_id(block), 'block_size': block_size, 'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name} if opts.do_close_blocks: block_dump['block']['open_for_writing'] = 0 else: block_dump['block']['open_for_writing'] = 1 # add acquisition_era, CRAB is important because of checks within DBS 3 block_dump['acquisition_era'] = {'acquisition_era_name': 'CRAB', 'start_date': 0} # add processing_era block_dump['processing_era'] = {'processing_version': 1, 'description': 'grid-control'} yield validate_dbs3_json('blockBulk', block_dump)
def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs') return tuple( imap(lambda r: int(r / run_range), fi[DataProvider.Metadata][metadata_idx]))
def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs') return tuple(imap(lambda r: int(r / run_range), fi[DataProvider.Metadata][metadata_idx]))