def files(self, number, **attrs): "Generate Phedex file meta-data" prim = attrs.get('prim', 'prim') proc = attrs.get('proc', 'proc') tier = attrs.get('tier', 'tier') tags = attrs.get('tags', '') output = super(PhedexDataProvider, self).files(number, **attrs) if tags else super( PhedexDataProvider, self).files(number) # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root idx = 0 gbyte = 1024 * 1024 * 1024 for row in output: era, proc_ds_name, ver = proc.split('-') ver = ver[1:] #remove v from v4711 counter = str(idx).zfill(9) prefix = '/store/data/%s/%s/%s/%s/%s/' % (era, prim, tier, ver, counter) name = prefix + row['file']['name'] checksum = 'cksum:%s,adler32:%s' \ % (generate_uid(4, '1234567890'), \ generate_uid(4, '1234567890')) size = random.randint(1 * gbyte, 2 * gbyte) doc = {'checksum': checksum, 'bytes': size, 'name': name} row['file'].update(doc) return output
def files(self, number, **attrs): "Generate Phedex file meta-data" prim = attrs.get('prim', 'prim') proc = attrs.get('proc', 'proc') tier = attrs.get('tier', 'tier') tags = attrs.get('tags', ''); output = super(PhedexDataProvider, self).files(number, **attrs) if tags else super(PhedexDataProvider, self).files(number) # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root idx = 0 gbyte = 1024*1024*1024 for row in output: era, proc_ds_name, ver = proc.split('-') ver = ver[1:] #remove v from v4711 counter = str(idx).zfill(9) prefix = '/store/data/%s/%s/%s/%s/%s/' % (era, prim, tier, ver, counter) name = prefix + row['file']['name'] checksum = 'cksum:%s,adler32:%s' \ % (generate_uid(4, '1234567890'), \ generate_uid(4, '1234567890')) size = random.randint(1*gbyte, 2*gbyte) doc = {'checksum': checksum, 'bytes': size, 'name': name} row['file'].update(doc) return output
def prim_ds(self, number, **attrs): "Generate DBS primary dataset meta-data" output = [] for _ in range(0, number): prim = attrs.get('prim', generate_uid(3, self._seed, self._fixed)) data_type = generate_uid(1, ['mc', 'data'], self._fixed) rec = dict(prim_ds=\ dict(primary_ds_name=prim, primary_ds_type=data_type)) output.append(rec) return output
def files(self, number, **attrs): "Generate DBS files meta-data" prim = attrs.get('prim', 'prim') proc = attrs.get('proc', 'proc') tier = attrs.get('tier', 'tier') oconfig = attrs.get( 'output_configs', { 'release_version': 'CMSSW_TEST', 'pset_hash': 'NO_PSET_HASH', 'app_name': 'Generator', 'output_module_label': 'TEST', 'global_tag': 'TAG' }) for key in ['prim', 'proc', 'tier', 'output_configs']: if attrs.has_key(key): del attrs[key] path = '/%s/%s/%s' % (prim, proc, tier) output = super(DBSDataProvider, self).files(number, **attrs) # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root idx = 0 for row in output: ver = '%s-v1' % proc counter = str(idx).zfill(9) prefix = '/store/data/era/%s/%s/%s/%s/' % (prim, tier, ver, counter) name = prefix + row['file']['name'] size = random.randint(1000, 1000000) ftype = generate_uid(1, ['EDM', 'ROOT'], self._fixed) doc = { 'logical_file_name': name, 'file_size': size, 'file_type': ftype, 'check_sum': generate_uid(8), 'adler32': generate_uid(8), 'file_output_config_list': [oconfig], 'file_lumi_list': self.file_lumi_list(), 'file_parent_list': [], 'auto_cross_section': 0.0, 'event_count': random.randint(10, 10000), 'dataset': path, 'file_type_id': 1, 'md5': 'NOTSET' } row['file'].update(doc) for att in ['name']: del row['file'][att] idx += 1 return output
def __init__(self, fixed=False, runs=5, lumis=5): DataProvider.__init__(self, fixed) self.runs_per_file = runs self.lumis_per_run = lumis #initial start values for run and lumi generation self._run_num = int('1' + generate_uid(5, '1234567890', self._fixed)) self._lumi_num = random.randint(1, 100)
def datasets(self, number, **attrs): "Generate DBS datasets meta-data" output = super(DBSDataProvider, self).datasets(number, **attrs) for row in output: name = row['dataset']['name'] proc_ver = row['dataset'].get('processing_version', 123) acq_era = row['dataset'].get('acquisition_era_name', 'test') prim_type = row['dataset'].get('primary_ds_type', 'mc') _, prim, proc, tier = name.split('/') group = generate_uid(1, ['Top', 'QCD', 'RelVal'], self._fixed) def_config = [{ 'release_version': 'CMSSW_TEST', 'pset_hash': 'NO_PSET_HASH', 'global_tag': 'TAG', 'app_name': 'Generator', 'output_module_label': 'TEST' }] oconfig = row['dataset'].get('output_configs', def_config) doc = { 'primary_ds_name': prim, 'processing_ds_name': proc, 'data_tier_name': tier, 'physics_group_name': group, 'acquisition_era_name': acq_era, 'processing_version': proc_ver, 'xtcrosssection': 0.1, 'output_configs': oconfig, 'primary_ds_type': 'mc', 'dataset_access_type': 'valid', 'prep_id': 1, 'dataset': name } row['dataset'].update(doc) del row['dataset']['name'] return output
def tiers(self, number, **attrs): "Generate DBS data tier meta-data" output = [] for _ in range(0, number): tier = attrs.get('tier', generate_uid(1, self._tiers, self._fixed)) rec = dict(tier=dict(data_tier_name=tier)) output.append(rec) return output
def block_dump(self, number_of_files=1): "Generate block with multiple files in it" # generate dataset configuration info rel = 'CMSSW_1_2_3' app = 'cmsRun' tag = 'TAG' label = 'Merge' phash = generate_uid(32) info = dict(release_version=rel, pset_hash=phash, app_name=app, output_module_label=label, global_tag=tag) # generate prim/proc/era prim = self.prim_ds(1)[0] proc_era = self.proc_eras(1)[0] acq_era = self.acq_eras(1)[0] tier = self.tiers(1)[0] # generate datasets proc = 'proc-%s' % proc_era['processing_era']['processing_version'] attrs = { 'prim': prim['prim_ds']['primary_ds_name'], 'processing_version': proc_era['processing_era']['processing_version'], 'acquisition_era_name': acq_era['acquisition_era']['acquisition_era_name'], 'proc': proc, 'tier': tier['tier']['data_tier_name'] } dataset = self.datasets(1, **attrs)[0] # generate blocks block = self.blocks(1)[0] # generate files files = self.files(number_of_files) # generate file config info file_info = [] for lfn in files: doc = dict(info) doc.update({'lfn': lfn['file']['logical_file_name']}) file_info.append(doc) rec = dict(dataset_conf_list=[info], file_conf_list=file_info, dataset=dataset['dataset'], block=block['block'], primds=prim['prim_ds'], processing_era=proc_era['processing_era'], acquisition_era=acq_era['acquisition_era'], files=files) return dict(blockDump=rec)
def acq_eras(self, number, **attrs): "Generate DBS acquisition era meta-data" output = [] desc = 'Test_acquisition_era' for _ in range(0, number): ver = generate_uid(4, self._seed, self._fixed) rec = dict(acquisition_era=\ dict(acquisition_era_name=ver, description=desc)) output.append(rec) return output
def proc_eras(self, number, **attrs): "Generate DBS processing era meta-data" output = [] desc = 'Test_proc_era' for _ in range(0, number): ver = int(generate_uid(4, '123456789', self._fixed)) rec = dict(processing_era=\ dict(processing_version=ver, description=desc)) output.append(rec) return output
def files(self, number, **attrs): "Generate DBS files meta-data" prim = attrs.get('prim', 'prim') proc = attrs.get('proc', 'proc') tier = attrs.get('tier', 'tier') oconfig = attrs.get('output_configs', {'release_version':'CMSSW_TEST', 'pset_hash':'NO_PSET_HASH', 'app_name':'Generator', 'output_module_label':'TEST', 'global_tag':'TAG'}) for key in ['prim', 'proc', 'tier', 'output_configs']: if attrs.has_key(key): del attrs[key] path = '/%s/%s/%s' % (prim, proc, tier) output = super(DBSDataProvider, self).files(number, **attrs) # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root idx = 0 for row in output: ver = '%s-v1' % proc counter = str(idx).zfill(9) prefix = '/store/data/era/%s/%s/%s/%s/' % (prim, tier, ver, counter) name = prefix + row['file']['name'] size = random.randint(1000, 1000000) ftype = generate_uid(1, ['EDM', 'ROOT'], self._fixed) doc = {'logical_file_name': name, 'file_size': size, 'file_type': ftype, 'check_sum': generate_uid(8), 'adler32': generate_uid(8), 'file_output_config_list': [oconfig], 'file_lumi_list': self.file_lumi_list(), 'file_parent_list': [], 'auto_cross_section': 0.0, 'event_count': random.randint(10, 10000), 'dataset': path, 'file_type_id': 1, 'md5':'NOTSET'} row['file'].update(doc) for att in ['name']: del row['file'][att] idx += 1 return output
def configs(self, number, **attrs): "Generate DBS output config meta-data" output = [] app = 'cmsRun' rel = 'CMSSW_1_2_3' tag = 'TAG' lab = 'Ouput_module_label' for _ in range(0, number): phash = generate_uid(32) rec = dict(configs=\ dict(release_version=rel, pset_hash=phash, app_name=app, output_module_label=lab, global_tag=tag)) output.append(rec) return output
def block_dump(self, number_of_files=1): "Generate block with multiple files in it" # generate dataset configuration info rel = 'CMSSW_1_2_3' app = 'cmsRun' tag = 'TAG' label = 'Merge' phash = generate_uid(32) info = dict(release_version=rel, pset_hash=phash, app_name=app, output_module_label=label, global_tag=tag) # generate prim/proc/era prim = self.prim_ds(1)[0] proc_era = self.proc_eras(1)[0] acq_era = self.acq_eras(1)[0] tier = self.tiers(1)[0] # generate datasets proc = 'proc-%s' % proc_era['processing_era']['processing_version'] attrs = {'prim':prim['prim_ds']['primary_ds_name'], 'processing_version':proc_era['processing_era']['processing_version'], 'acquisition_era_name': acq_era['acquisition_era']['acquisition_era_name'], 'proc': proc, 'tier':tier['tier']['data_tier_name']} dataset = self.datasets(1, **attrs)[0] # generate blocks block = self.blocks(1)[0] # generate files files = self.files(number_of_files) # generate file config info file_info = [] for lfn in files: doc = dict(info) doc.update({'lfn':lfn['file']['logical_file_name']}) file_info.append(doc) rec = dict(dataset_conf_list=[info], file_conf_list=file_info, dataset=dataset['dataset'], block=block['block'], primds=prim['prim_ds'], processing_era=proc_era['processing_era'], acquisition_era=acq_era['acquisition_era'], files=files) return dict(blockDump=rec)
def datasets(self, number, **attrs): "Generate DBS datasets meta-data" output = super(DBSDataProvider, self).datasets(number, **attrs) for row in output: name = row['dataset']['name'] proc_ver = row['dataset'].get('processing_version', 123) acq_era = row['dataset'].get('acquisition_era_name', 'test') prim_type = row['dataset'].get('primary_ds_type', 'mc') _, prim, proc, tier = name.split('/') group = generate_uid(1, ['Top', 'QCD', 'RelVal'], self._fixed) def_config = [{'release_version':'CMSSW_TEST', 'pset_hash':'NO_PSET_HASH', 'global_tag':'TAG', 'app_name':'Generator', 'output_module_label':'TEST'}] oconfig = row['dataset'].get('output_configs', def_config) doc = {'primary_ds_name': prim, 'processing_ds_name': proc, 'data_tier_name': tier, 'physics_group_name': group, 'acquisition_era_name': acq_era, 'processing_version': proc_ver, 'xtcrosssection': 0.1, 'output_configs':oconfig, 'primary_ds_type':'mc', 'dataset_access_type': 'valid', 'prep_id':1, 'dataset': name} row['dataset'].update(doc) del row['dataset']['name'] return output
def pset_hash(self): "return parameter set hash" if not hasattr(self, '_pset_hash'): self._pset_hash = generate_uid(32) return self._pset_hash
def primary_ds_type(self): "return primary dataset type" if not hasattr(self, '_primary_ds_type'): self._primary_ds_type = generate_uid(1, ['mc', 'data'], self._fixed) return self._primary_ds_type
def block_dump(block): # generate dataset configuration info rel = 'CMSSW_1_2_3' app = 'cmsRun' tag = 'TAG' label = 'Merged' phash = generate_uid(32) phys_group = 'Tracker' info = dict(release_version=rel, pset_hash=phash, app_name=app, output_module_label=label, global_tag=tag) block_name = block['name'] dataset_name = block_name.split('#')[0] _, primary_ds_name, processed_ds_name, tier = dataset_name.split('/') acquisition_era_name, _, processing_version = processed_ds_name.split("-") proc_era = {"processing_version": processing_version[1:], #remove v from v4711 "description": "Test_proc_era"} acq_era = {"acquisition_era_name": acquisition_era_name, 'start_date': 1234567890, "description": "Test_acquisition_era"} primds = dbs_data_provider.prim_ds(1)[0].get('prim_ds') primds.update({"primary_ds_name":primary_ds_name}) files = [] file_conf_list = [] block_size = 0 for this_file in block['files']: this_file = this_file['file'] cksum = this_file['checksum'] block_size += this_file['bytes'] files.append({'check_sum': cksum.split(',')[0].split(':')[1], 'file_lumi_list': dbs_data_provider.file_lumi_list(), 'adler32': cksum.split(',')[1].split(':')[1], 'event_count': random.randint(10, 10000), 'file_type': 'EDM', 'logical_file_name': this_file['name'], 'md5': None, 'auto_cross_section': 0.0}) file_conf_list.append({'release_version': rel, 'pset_hash': phash, 'lfn': this_file['name'], 'app_name': app, 'output_module_label': label, 'global_tag': tag}) block_dump = {'dataset_conf_list': [{'release_version' : rel, 'pset_hash' : phash, 'app_name' : app, 'output_module_label' : label, 'global_tag' : tag}], 'file_conf_list' : file_conf_list, 'files' : files, 'processing_era' : proc_era, 'primds' : primds, 'dataset':{'physics_group_name': phys_group, 'dataset_access_type': 'VALID', 'data_tier_name': tier, 'processed_ds_name': processed_ds_name, 'xtcrosssection': 123.0, 'dataset': dataset_name}, 'acquisition_era': acq_era, 'block': {'open_for_writing': block['is-open']=='y', 'block_name': block_name, 'file_count': block['nfiles'], 'origin_site_name': 'grid-srm.physik.rwth-aachen.de', 'block_size': block_size}, 'file_parent_list': [] } return block_dump