Example #1
0
    def files(self, number, **attrs):
        "Generate Phedex file meta-data"
        prim = attrs.get('prim', 'prim')
        proc = attrs.get('proc', 'proc')
        tier = attrs.get('tier', 'tier')
        tags = attrs.get('tags', '')

        output = super(PhedexDataProvider,
                       self).files(number, **attrs) if tags else super(
                           PhedexDataProvider, self).files(number)

        # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root
        idx = 0
        gbyte = 1024 * 1024 * 1024
        for row in output:
            era, proc_ds_name, ver = proc.split('-')
            ver = ver[1:]  #remove v from v4711
            counter = str(idx).zfill(9)
            prefix = '/store/data/%s/%s/%s/%s/%s/' % (era, prim, tier, ver,
                                                      counter)
            name = prefix + row['file']['name']
            checksum = 'cksum:%s,adler32:%s' \
                    % (generate_uid(4, '1234567890'), \
                        generate_uid(4, '1234567890'))
            size = random.randint(1 * gbyte, 2 * gbyte)
            doc = {'checksum': checksum, 'bytes': size, 'name': name}
            row['file'].update(doc)
        return output
Example #2
0
 def files(self, number, **attrs):
     "Generate Phedex file meta-data"
     prim = attrs.get('prim', 'prim')
     proc = attrs.get('proc', 'proc')
     tier = attrs.get('tier', 'tier')
     tags = attrs.get('tags', '');
     
     output = super(PhedexDataProvider, self).files(number, **attrs) if tags else super(PhedexDataProvider, self).files(number)
     
     # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root
     idx = 0
     gbyte = 1024*1024*1024
     for row in output:
         era, proc_ds_name, ver = proc.split('-')
         ver = ver[1:] #remove v from v4711
         counter = str(idx).zfill(9)
         prefix = '/store/data/%s/%s/%s/%s/%s/' % (era, prim, tier, ver, counter)
         name = prefix + row['file']['name']
         checksum = 'cksum:%s,adler32:%s' \
                 % (generate_uid(4, '1234567890'), \
                     generate_uid(4, '1234567890'))
         size = random.randint(1*gbyte, 2*gbyte)
         doc = {'checksum': checksum, 'bytes': size, 'name': name}
         row['file'].update(doc)
     return output
Example #3
0
 def prim_ds(self, number, **attrs):
     "Generate DBS primary dataset meta-data"
     output = []
     for _ in range(0, number):
         prim = attrs.get('prim', generate_uid(3, self._seed, self._fixed))
         data_type = generate_uid(1, ['mc', 'data'], self._fixed)
         rec = dict(prim_ds=\
                 dict(primary_ds_name=prim, primary_ds_type=data_type))
         output.append(rec)
     return output
Example #4
0
 def prim_ds(self, number, **attrs):
     "Generate DBS primary dataset meta-data"
     output = []
     for _ in range(0, number):
         prim = attrs.get('prim', generate_uid(3, self._seed, self._fixed))
         data_type = generate_uid(1, ['mc', 'data'], self._fixed)
         rec = dict(prim_ds=\
                 dict(primary_ds_name=prim, primary_ds_type=data_type))
         output.append(rec)
     return output
Example #5
0
 def files(self, number, **attrs):
     "Generate DBS files meta-data"
     prim = attrs.get('prim', 'prim')
     proc = attrs.get('proc', 'proc')
     tier = attrs.get('tier', 'tier')
     oconfig = attrs.get(
         'output_configs', {
             'release_version': 'CMSSW_TEST',
             'pset_hash': 'NO_PSET_HASH',
             'app_name': 'Generator',
             'output_module_label': 'TEST',
             'global_tag': 'TAG'
         })
     for key in ['prim', 'proc', 'tier', 'output_configs']:
         if attrs.has_key(key):
             del attrs[key]
     path = '/%s/%s/%s' % (prim, proc, tier)
     output = super(DBSDataProvider, self).files(number, **attrs)
     # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root
     idx = 0
     for row in output:
         ver = '%s-v1' % proc
         counter = str(idx).zfill(9)
         prefix = '/store/data/era/%s/%s/%s/%s/' % (prim, tier, ver,
                                                    counter)
         name = prefix + row['file']['name']
         size = random.randint(1000, 1000000)
         ftype = generate_uid(1, ['EDM', 'ROOT'], self._fixed)
         doc = {
             'logical_file_name': name,
             'file_size': size,
             'file_type': ftype,
             'check_sum': generate_uid(8),
             'adler32': generate_uid(8),
             'file_output_config_list': [oconfig],
             'file_lumi_list': self.file_lumi_list(),
             'file_parent_list': [],
             'auto_cross_section': 0.0,
             'event_count': random.randint(10, 10000),
             'dataset': path,
             'file_type_id': 1,
             'md5': 'NOTSET'
         }
         row['file'].update(doc)
         for att in ['name']:
             del row['file'][att]
         idx += 1
     return output
Example #6
0
 def __init__(self, fixed=False, runs=5, lumis=5):
     DataProvider.__init__(self, fixed)
     self.runs_per_file = runs
     self.lumis_per_run = lumis
     #initial start values for run and lumi generation
     self._run_num = int('1' + generate_uid(5, '1234567890', self._fixed))
     self._lumi_num = random.randint(1, 100)
Example #7
0
 def datasets(self, number, **attrs):
     "Generate DBS datasets meta-data"
     output = super(DBSDataProvider, self).datasets(number, **attrs)
     for row in output:
         name = row['dataset']['name']
         proc_ver = row['dataset'].get('processing_version', 123)
         acq_era = row['dataset'].get('acquisition_era_name', 'test')
         prim_type = row['dataset'].get('primary_ds_type', 'mc')
         _, prim, proc, tier = name.split('/')
         group = generate_uid(1, ['Top', 'QCD', 'RelVal'], self._fixed)
         def_config = [{
             'release_version': 'CMSSW_TEST',
             'pset_hash': 'NO_PSET_HASH',
             'global_tag': 'TAG',
             'app_name': 'Generator',
             'output_module_label': 'TEST'
         }]
         oconfig = row['dataset'].get('output_configs', def_config)
         doc = {
             'primary_ds_name': prim,
             'processing_ds_name': proc,
             'data_tier_name': tier,
             'physics_group_name': group,
             'acquisition_era_name': acq_era,
             'processing_version': proc_ver,
             'xtcrosssection': 0.1,
             'output_configs': oconfig,
             'primary_ds_type': 'mc',
             'dataset_access_type': 'valid',
             'prep_id': 1,
             'dataset': name
         }
         row['dataset'].update(doc)
         del row['dataset']['name']
     return output
Example #8
0
 def __init__(self, fixed=False, runs=5, lumis=5):
     DataProvider.__init__(self, fixed)
     self.runs_per_file = runs
     self.lumis_per_run = lumis
     #initial start values for run and lumi generation
     self._run_num  = int('1' + generate_uid(5, '1234567890', self._fixed))
     self._lumi_num = random.randint(1, 100)
Example #9
0
 def tiers(self, number, **attrs):
     "Generate DBS data tier meta-data"
     output = []
     for _ in range(0, number):
         tier = attrs.get('tier', generate_uid(1, self._tiers, self._fixed))
         rec = dict(tier=dict(data_tier_name=tier))
         output.append(rec)
     return output
Example #10
0
 def tiers(self, number, **attrs):
     "Generate DBS data tier meta-data"
     output = []
     for _ in range(0, number):
         tier = attrs.get('tier', generate_uid(1, self._tiers, self._fixed))
         rec = dict(tier=dict(data_tier_name=tier))
         output.append(rec)
     return output
Example #11
0
    def block_dump(self, number_of_files=1):
        "Generate block with multiple files in it"

        # generate dataset configuration info
        rel = 'CMSSW_1_2_3'
        app = 'cmsRun'
        tag = 'TAG'
        label = 'Merge'
        phash = generate_uid(32)
        info = dict(release_version=rel,
                    pset_hash=phash,
                    app_name=app,
                    output_module_label=label,
                    global_tag=tag)

        # generate prim/proc/era
        prim = self.prim_ds(1)[0]
        proc_era = self.proc_eras(1)[0]
        acq_era = self.acq_eras(1)[0]
        tier = self.tiers(1)[0]

        # generate datasets
        proc = 'proc-%s' % proc_era['processing_era']['processing_version']
        attrs = {
            'prim':
            prim['prim_ds']['primary_ds_name'],
            'processing_version':
            proc_era['processing_era']['processing_version'],
            'acquisition_era_name':
            acq_era['acquisition_era']['acquisition_era_name'],
            'proc':
            proc,
            'tier':
            tier['tier']['data_tier_name']
        }
        dataset = self.datasets(1, **attrs)[0]

        # generate blocks
        block = self.blocks(1)[0]

        # generate files
        files = self.files(number_of_files)

        # generate file config info
        file_info = []
        for lfn in files:
            doc = dict(info)
            doc.update({'lfn': lfn['file']['logical_file_name']})
            file_info.append(doc)
        rec = dict(dataset_conf_list=[info],
                   file_conf_list=file_info,
                   dataset=dataset['dataset'],
                   block=block['block'],
                   primds=prim['prim_ds'],
                   processing_era=proc_era['processing_era'],
                   acquisition_era=acq_era['acquisition_era'],
                   files=files)
        return dict(blockDump=rec)
Example #12
0
 def acq_eras(self, number, **attrs):
     "Generate DBS acquisition era meta-data"
     output = []
     desc = 'Test_acquisition_era'
     for _ in range(0, number):
         ver = generate_uid(4, self._seed, self._fixed)
         rec  = dict(acquisition_era=\
                 dict(acquisition_era_name=ver, description=desc))
         output.append(rec)
     return output
Example #13
0
 def proc_eras(self, number, **attrs):
     "Generate DBS processing era meta-data"
     output = []
     desc = 'Test_proc_era'
     for _ in range(0, number):
         ver  = int(generate_uid(4, '123456789', self._fixed))
         rec  = dict(processing_era=\
                 dict(processing_version=ver, description=desc))
         output.append(rec)
     return output
Example #14
0
 def acq_eras(self, number, **attrs):
     "Generate DBS acquisition era meta-data"
     output = []
     desc = 'Test_acquisition_era'
     for _ in range(0, number):
         ver  = generate_uid(4, self._seed, self._fixed)
         rec  = dict(acquisition_era=\
                 dict(acquisition_era_name=ver, description=desc))
         output.append(rec)
     return output
Example #15
0
 def proc_eras(self, number, **attrs):
     "Generate DBS processing era meta-data"
     output = []
     desc = 'Test_proc_era'
     for _ in range(0, number):
         ver = int(generate_uid(4, '123456789', self._fixed))
         rec  = dict(processing_era=\
                 dict(processing_version=ver, description=desc))
         output.append(rec)
     return output
Example #16
0
 def files(self, number, **attrs):
     "Generate DBS files meta-data"
     prim = attrs.get('prim', 'prim')
     proc = attrs.get('proc', 'proc')
     tier = attrs.get('tier', 'tier')
     oconfig = attrs.get('output_configs', {'release_version':'CMSSW_TEST',
                                           'pset_hash':'NO_PSET_HASH',
                                           'app_name':'Generator',
                                           'output_module_label':'TEST',
                                           'global_tag':'TAG'})
     for key in ['prim', 'proc', 'tier', 'output_configs']:
         if  attrs.has_key(key):
             del attrs[key]
     path = '/%s/%s/%s' % (prim, proc, tier)
     output = super(DBSDataProvider, self).files(number, **attrs)
     # /store/data/acq_era/prim_dataset/data_tier/proc_version/lfn_counter/f.root
     idx = 0
     for row in output:
         ver = '%s-v1' % proc
         counter = str(idx).zfill(9)
         prefix = '/store/data/era/%s/%s/%s/%s/' % (prim, tier, ver, counter)
         name = prefix + row['file']['name']
         size = random.randint(1000, 1000000)
         ftype = generate_uid(1, ['EDM', 'ROOT'], self._fixed)
         doc = {'logical_file_name': name,
             'file_size': size, 'file_type': ftype,
             'check_sum': generate_uid(8), 'adler32': generate_uid(8),
             'file_output_config_list': [oconfig],
             'file_lumi_list': self.file_lumi_list(),
             'file_parent_list': [], 'auto_cross_section': 0.0,
             'event_count': random.randint(10, 10000),
             'dataset': path,
             'file_type_id': 1, 'md5':'NOTSET'}
         row['file'].update(doc)
         for att in ['name']:
             del row['file'][att]
         idx += 1
     return output
Example #17
0
 def configs(self, number, **attrs):
     "Generate DBS output config meta-data"
     output = []
     app = 'cmsRun'
     rel = 'CMSSW_1_2_3'
     tag = 'TAG'
     lab = 'Ouput_module_label'
     for _ in range(0, number):
         phash = generate_uid(32)
         rec  = dict(configs=\
                 dict(release_version=rel, pset_hash=phash, app_name=app,
                     output_module_label=lab, global_tag=tag))
         output.append(rec)
     return output
Example #18
0
 def configs(self, number, **attrs):
     "Generate DBS output config meta-data"
     output = []
     app = 'cmsRun'
     rel = 'CMSSW_1_2_3'
     tag = 'TAG'
     lab = 'Ouput_module_label'
     for _ in range(0, number):
         phash = generate_uid(32)
         rec  = dict(configs=\
                 dict(release_version=rel, pset_hash=phash, app_name=app,
                     output_module_label=lab, global_tag=tag))
         output.append(rec)
     return output
Example #19
0
    def block_dump(self, number_of_files=1):
        "Generate block with multiple files in it"

        # generate dataset configuration info
        rel   = 'CMSSW_1_2_3'
        app   = 'cmsRun'
        tag   = 'TAG'
        label = 'Merge'
        phash = generate_uid(32)
        info  = dict(release_version=rel, pset_hash=phash, app_name=app,
                        output_module_label=label, global_tag=tag)

        # generate prim/proc/era
        prim = self.prim_ds(1)[0]
        proc_era = self.proc_eras(1)[0]
        acq_era = self.acq_eras(1)[0]
        tier = self.tiers(1)[0]

        # generate datasets
        proc = 'proc-%s' % proc_era['processing_era']['processing_version']
        attrs = {'prim':prim['prim_ds']['primary_ds_name'],
                 'processing_version':proc_era['processing_era']['processing_version'],
                 'acquisition_era_name': acq_era['acquisition_era']['acquisition_era_name'],
                 'proc': proc,
                 'tier':tier['tier']['data_tier_name']}
        dataset = self.datasets(1, **attrs)[0]

        # generate blocks
        block = self.blocks(1)[0]

        # generate files
        files = self.files(number_of_files)

        # generate file config info
        file_info = []
        for lfn in files:
            doc = dict(info)
            doc.update({'lfn':lfn['file']['logical_file_name']})
            file_info.append(doc)
        rec = dict(dataset_conf_list=[info], file_conf_list=file_info,
                dataset=dataset['dataset'], block=block['block'],
                primds=prim['prim_ds'], processing_era=proc_era['processing_era'],
                acquisition_era=acq_era['acquisition_era'], files=files)
        return dict(blockDump=rec)
Example #20
0
 def datasets(self, number, **attrs):
     "Generate DBS datasets meta-data"
     output = super(DBSDataProvider, self).datasets(number, **attrs)
     for row in output:
         name = row['dataset']['name']
         proc_ver = row['dataset'].get('processing_version', 123)
         acq_era = row['dataset'].get('acquisition_era_name', 'test')
         prim_type = row['dataset'].get('primary_ds_type', 'mc')
         _, prim, proc, tier = name.split('/')
         group = generate_uid(1, ['Top', 'QCD', 'RelVal'], self._fixed)
         def_config = [{'release_version':'CMSSW_TEST',
                        'pset_hash':'NO_PSET_HASH', 'global_tag':'TAG',
                        'app_name':'Generator', 'output_module_label':'TEST'}]
         oconfig = row['dataset'].get('output_configs', def_config)
         doc = {'primary_ds_name': prim, 'processing_ds_name': proc,
             'data_tier_name': tier, 'physics_group_name': group,
             'acquisition_era_name': acq_era, 'processing_version': proc_ver,
             'xtcrosssection': 0.1, 'output_configs':oconfig,
             'primary_ds_type':'mc', 'dataset_access_type': 'valid',
             'prep_id':1, 'dataset': name}
         row['dataset'].update(doc)
         del row['dataset']['name']
     return output
Example #21
0
 def pset_hash(self):
     "return parameter set hash"
     if not hasattr(self, '_pset_hash'):
         self._pset_hash = generate_uid(32)
     return self._pset_hash
Example #22
0
 def primary_ds_type(self):
     "return primary dataset type"
     if not hasattr(self, '_primary_ds_type'):
         self._primary_ds_type = generate_uid(1, ['mc', 'data'],
                                              self._fixed)
     return self._primary_ds_type
Example #23
0
 def pset_hash(self):
     "return parameter set hash"
     if not hasattr(self, '_pset_hash'):
         self._pset_hash = generate_uid(32)
     return self._pset_hash
Example #24
0
def block_dump(block):
    # generate dataset configuration info
    rel   = 'CMSSW_1_2_3'
    app   = 'cmsRun'
    tag   = 'TAG'
    label = 'Merged'
    phash = generate_uid(32)
    phys_group = 'Tracker'
    info  = dict(release_version=rel, pset_hash=phash, app_name=app,
                 output_module_label=label, global_tag=tag)

    block_name = block['name']
    dataset_name = block_name.split('#')[0]
    _, primary_ds_name, processed_ds_name, tier = dataset_name.split('/')
    acquisition_era_name, _, processing_version = processed_ds_name.split("-")
    proc_era = {"processing_version": processing_version[1:], #remove v from v4711
                "description": "Test_proc_era"}
    acq_era = {"acquisition_era_name": acquisition_era_name,
               'start_date': 1234567890,
               "description": "Test_acquisition_era"}
    primds = dbs_data_provider.prim_ds(1)[0].get('prim_ds')
    primds.update({"primary_ds_name":primary_ds_name})

    files = []
    file_conf_list = []
    block_size = 0
    
    for this_file in block['files']:
        this_file = this_file['file']
        cksum = this_file['checksum']
        block_size += this_file['bytes']
        files.append({'check_sum': cksum.split(',')[0].split(':')[1],
                      'file_lumi_list': dbs_data_provider.file_lumi_list(),
                      'adler32': cksum.split(',')[1].split(':')[1],
                      'event_count': random.randint(10, 10000),
                      'file_type': 'EDM',
                      'logical_file_name': this_file['name'],
                      'md5': None,
                      'auto_cross_section': 0.0})
        file_conf_list.append({'release_version': rel,
                               'pset_hash': phash,
                               'lfn': this_file['name'],
                               'app_name': app,
                               'output_module_label': label,
                               'global_tag': tag})
    
    block_dump = {'dataset_conf_list': [{'release_version' : rel,
                                         'pset_hash' : phash,
                                         'app_name' : app,
                                         'output_module_label' : label,
                                         'global_tag' : tag}],
                  'file_conf_list' : file_conf_list,
                  'files' : files,
                  'processing_era' : proc_era,
                  'primds' : primds,
                  'dataset':{'physics_group_name': phys_group,
                             'dataset_access_type': 'VALID',
                             'data_tier_name': tier,
                             'processed_ds_name': processed_ds_name,
                             'xtcrosssection': 123.0,
                             'dataset': dataset_name},
                  'acquisition_era': acq_era,
                  'block': {'open_for_writing': block['is-open']=='y',
                            'block_name': block_name,
                            'file_count': block['nfiles'],
                            'origin_site_name': 'grid-srm.physik.rwth-aachen.de',
                            'block_size': block_size},
                  'file_parent_list': []
                  }

    return block_dump
Example #25
0
 def primary_ds_type(self):
     "return primary dataset type"
     if not hasattr(self, '_primary_ds_type'):
         self._primary_ds_type = generate_uid(1, ['mc', 'data'], self._fixed)
     return self._primary_ds_type