Example #1
0
def create_dbs3_proto_blocks(opts, dataset_blocks):
	for dataset in dataset_blocks:
		missing_info_blocks = []
		dataset_types = set()
		for block in dataset_blocks[dataset]:
			block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
			(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
			if len(block_dataset_types) > 1:
				raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block))
			elif len(block_dataset_types) == 1:
				yield (block, block_dump, block_size, block_dataset_types.pop())
			else:
				missing_info_blocks.append((block, block_dump, block_size))
			dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information

		if missing_info_blocks:
			if len(dataset_types) > 1:
				raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
			elif len(dataset_types) == 0:
				if not opts.datatype:
					raise Exception('Please supply dataset type via --datatype!')
				dataset_type = opts.datatype
			else:
				dataset_type = dataset_types.pop()
			for (block, block_dump, block_size) in missing_info_blocks:
				yield (block, block_dump, block_size, dataset_type)
Example #2
0
def create_dbs3_proto_blocks(opts, dataset_blocks):
	for dataset in dataset_blocks:
		missing_info_blocks = []
		dataset_types = set()
		for block in dataset_blocks[dataset]:
			block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
			(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
			if len(block_dataset_types) > 1:
				raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block))
			elif len(block_dataset_types) == 1:
				yield (block, block_dump, block_size, block_dataset_types.pop())
			else:
				missing_info_blocks.append((block, block_dump, block_size))
			dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information

		if missing_info_blocks:
			if len(dataset_types) > 1:
				raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
			elif len(dataset_types) == 0:
				if not opts.datatype:
					raise Exception('Please supply dataset type via --datatype!')
				dataset_type = opts.datatype
			else:
				dataset_type = dataset_types.pop()
			for (block, block_dump, block_size) in missing_info_blocks:
				yield (block, block_dump, block_size, dataset_type)
Example #3
0
def create_dbs3_json_blocks(opts, dataset_blocks):
    for (block, block_dump, block_size,
         dataset_type) in create_dbs3_proto_blocks(opts, dataset_blocks):
        dataset = block[DataProvider.Dataset]
        try:
            primary_dataset, processed_dataset, data_tier = dataset[1:].split(
                '/')
        except Exception:
            raise DatasetError('Dataset name %s is not a valid DBS name!' %
                               dataset)

        # add primary dataset information
        block_dump['primds'] = {
            'primary_ds_type': dataset_type,
            'primary_ds_name': primary_dataset
        }

        # add dataset information
        block_dump['dataset'] = {
            'dataset': dataset,
            'processed_ds_name': processed_dataset,
            'data_tier_name': data_tier,
            'physics_group_name': None,
            'dataset_access_type': 'VALID',
            'xtcrosssection':
            None,  # TODO: Add to meta data from FrameWorkJobReport, if possible!
        }

        # add block information
        site_db = SiteDB()
        try:
            origin_site_name = site_db.se_to_cms_name(
                block[DataProvider.Locations][0])[0]
        except IndexError:
            origin_site_name = 'UNKNOWN'

        block_dump['block'] = {
            'block_name': DataProvider.bName(block),
            'block_size': block_size,
            'file_count': len(block[DataProvider.FileList]),
            'origin_site_name': origin_site_name
        }
        if opts.do_close_blocks:
            block_dump['block']['open_for_writing'] = 0
        else:
            block_dump['block']['open_for_writing'] = 1

        # add acquisition_era, CRAB is important because of checks within DBS 3
        block_dump['acquisition_era'] = {
            'acquisition_era_name': 'CRAB',
            'start_date': 0
        }
        # add processing_era
        block_dump['processing_era'] = {
            'processing_version': 1,
            'description': 'grid-control'
        }

        yield validate_dbs3_json('blockBulk', block_dump)
Example #4
0
def create_dbs3_json_blocks(opts, dataset_blocks):
	for (block, block_dump, block_size, dataset_type) in create_dbs3_proto_blocks(opts, dataset_blocks):
		dataset = block[DataProvider.Dataset]
		try:
			primary_dataset, processed_dataset, data_tier = dataset[1:].split('/')
		except Exception:
			raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset)

		# add primary dataset information
		block_dump['primds'] = {'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset}

		# add dataset information
		block_dump['dataset'] = {
			'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier,
			'physics_group_name': None, 'dataset_access_type': 'VALID',
			'xtcrosssection': None, # TODO: Add to meta data from FrameWorkJobReport, if possible!
		}

		# add block information
		site_db = SiteDB()
		try:
			origin_site_name = site_db.se_to_cms_name(block[DataProvider.Locations][0])[0]
		except IndexError:
			origin_site_name = 'UNKNOWN'

		block_dump['block'] = {'block_name': DataProvider.bName(block), 'block_size': block_size,
			'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name}
		if opts.do_close_blocks:
			block_dump['block']['open_for_writing'] = 0
		else:
			block_dump['block']['open_for_writing'] = 1

		# add acquisition_era, CRAB is important because of checks within DBS 3
		block_dump['acquisition_era'] = {'acquisition_era_name': 'CRAB', 'start_date': 0}
		# add processing_era
		block_dump['processing_era'] = {'processing_version': 1, 'description': 'grid-control'}

		yield validate_dbs3_json('blockBulk', block_dump)
Example #5
0
	def processBlock(self, block):
		if self._lumi_filter.empty() and ((self._lumi_keep == LumiKeep.RunLumi) or (DataProvider.Metadata not in block)):
			return block
		def getMetadataIdx(key):
			if key in block.get(DataProvider.Metadata, []):
				return block[DataProvider.Metadata].index(key)
		idxRuns = getMetadataIdx('Runs')
		idxLumi = getMetadataIdx('Lumi')
		if not self._lumi_filter.empty():
			lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector = False)
			if lumi_filter and (self._lumi_strict == LumiMode.strict) and ((idxRuns is None) or (idxLumi is None)):
				raise DatasetError('Strict lumi filter active but dataset %s does not provide lumi information!' % DataProvider.bName(block))
			elif lumi_filter and (self._lumi_strict == LumiMode.weak) and (idxRuns is None):
				raise DatasetError('Weak lumi filter active but dataset %s does not provide run information!' % DataProvider.bName(block))

		block[DataProvider.FileList] = list(self._processFI(block, idxRuns, idxLumi))
		if not block[DataProvider.FileList]:
			return
		block[DataProvider.NEntries] = sum(imap(lambda fi: fi[DataProvider.NEntries], block[DataProvider.FileList]))
		# Prune metadata
		if self._lumi_keep == LumiKeep.RunLumi:
			return block
		elif self._lumi_keep == LumiKeep.Run:
			idxRuns = None
		removeRunLumi(block[DataProvider.Metadata], idxRuns, idxLumi)
		return block