Example #1
0
 def add_datasets(self, input_prim_proc_acq_tier_config, number=1):
     "Add blocks to a given primary dataset"
     idict    = deepcopy(input_prim_proc_acq_tier_config)
     prim_val = idict['prim_ds']
     proc_ver = idict['processing_era']
     acq_era  = idict['acquisition_era']
     tier     = idict['tier']
     config   = idict['configs']
     func     = lambda x: isinstance(x, dict) and [x] or x
     prim_val = func(prim_val)
     proc_ver = func(proc_ver)
     acq_era  = func(acq_era)
     tier     = func(tier)
     config   = func(config)
     output   = []
     for item_prim, item_proc, item_acq, item_tier, item_config \
     in zip(prim_val, proc_ver, acq_era, tier, config):
         prim = item_prim['primary_ds_name']
         acq  = item_acq['acquisition_era_name']
         tier = item_tier['data_tier_name']
         proc_ver = item_proc['processing_version']
         attrs = {'prim':prim,
                  'processing_version':proc_ver,
                  'acquisition_era_name':acq,
                  'tier':tier,
                  'output_configs':[item_config]}
         res  = self.datasets(number, **attrs)
         for row in res:
             output.append(row['dataset'])
     idict['dataset'] = output
     return idict
Example #2
0
 def add_datasets(self, input_prim_proc_acq_tier_config, number=1):
     "Add blocks to a given primary dataset"
     idict = deepcopy(input_prim_proc_acq_tier_config)
     prim_val = idict['prim_ds']
     proc_ver = idict['processing_era']
     acq_era = idict['acquisition_era']
     tier = idict['tier']
     config = idict['configs']
     func = lambda x: isinstance(x, dict) and [x] or x
     prim_val = func(prim_val)
     proc_ver = func(proc_ver)
     acq_era = func(acq_era)
     tier = func(tier)
     config = func(config)
     output = []
     for item_prim, item_proc, item_acq, item_tier, item_config \
     in zip(prim_val, proc_ver, acq_era, tier, config):
         prim = item_prim['primary_ds_name']
         acq = item_acq['acquisition_era_name']
         tier = item_tier['data_tier_name']
         proc_ver = item_proc['processing_version']
         attrs = {
             'prim': prim,
             'processing_version': proc_ver,
             'acquisition_era_name': acq,
             'tier': tier,
             'output_configs': [item_config]
         }
         res = self.datasets(number, **attrs)
         for row in res:
             output.append(row['dataset'])
     idict['dataset'] = output
     return idict
Example #3
0
 def add_files(self, input_dataset, number_of_files=1):
     "Add files to a given dataset"
     record  = deepcopy(input_dataset)
     block   = record['block']
     if  not isinstance(block, list):
         block = [block]
     for rec in block:
         _, prim, proc, tier = rec['block_name'].split('#')[0].split('/')
         attrs = {'prim':prim, 'proc':proc, 'tier':tier, 'block_name':rec['block_name'], 'output_configs':record['configs']}
         res  = self.files(number_of_files, **attrs)
     return res
Example #4
0
 def add_blocks(self, input_dataset, number_of_blocks=1):
     "Add blocks to a given dataset"
     dataset = deepcopy(input_dataset)
     name = dataset['dataset']['name']
     res = self.blocks(number_of_blocks)
     for row in res:
         buid = generate_block_uid()
         row['block']['name'] = '%s#%s' % (name, buid)
     if dataset['dataset']['is-open'] == 'y':
         blocks = dataset['dataset']['blocks']
         blocks += res
     return dataset
Example #5
0
 def add_blocks(self, input_dataset, number_of_blocks=1):
     "Add blocks to a given dataset"
     dataset = deepcopy(input_dataset)
     name = dataset['dataset']['name']
     res  = self.blocks(number_of_blocks)
     for row in res:
         buid = generate_block_uid()
         row['block']['name'] = '%s#%s' % (name, buid)
     if  dataset['dataset']['is-open'] == 'y':
         blocks = dataset['dataset']['blocks']
         blocks += res
     return dataset
Example #6
0
 def add_blocks(self, input_dataset, number=1):
     "Add blocks to a given dataset"
     idict = deepcopy(input_dataset)
     datasets = idict['dataset']
     if isinstance(datasets, dict):
         datasets = [datasets]
     output = []
     for item in datasets:
         _, prim, proc, tier = item['dataset'].split('/')
         attrs = {'prim': prim, 'proc': proc, 'tier': tier}
         res = self.blocks(number, **attrs)
         for row in res:
             output.append(row['block'])
     idict['block'] = output
     return idict
Example #7
0
 def add_blocks(self, input_dataset, number=1):
     "Add blocks to a given dataset"
     idict = deepcopy(input_dataset)
     datasets = idict['dataset']
     if  isinstance(datasets, dict):
         datasets = [datasets]
     output = []
     for item in datasets:
         _, prim, proc, tier = item['dataset'].split('/')
         attrs = {'prim':prim, 'proc':proc, 'tier':tier}
         res   = self.blocks(number, **attrs)
         for row in res:
             output.append(row['block'])
     idict['block'] = output
     return idict
Example #8
0
    def dataset(self):
        "return dataset object"
        if not hasattr(self, '_dataset'):
            self.generate_dataset()

        phedex_data = {'dbs_name': self.dbs_name}
        dataset = deepcopy(self._dataset)
        dataset.update({'is-open': self.dataset_is_open})

        for block in dataset['blocks']:
            #update block information
            size = sum([f['file']['bytes'] for f in block['block']['files']])
            block['block'].update({"nfiles": len(block['block']['files']),
                                   "size": size})

        phedex_data.update(dict(dataset=dataset))
        return phedex_data
Example #9
0
 def add_files(self, input_dataset, number_of_files=1):
     "Add files to a given dataset"
     record = deepcopy(input_dataset)
     block = record['block']
     if not isinstance(block, list):
         block = [block]
     for rec in block:
         _, prim, proc, tier = rec['block_name'].split('#')[0].split('/')
         attrs = {
             'prim': prim,
             'proc': proc,
             'tier': tier,
             'block_name': rec['block_name'],
             'output_configs': record['configs']
         }
         res = self.files(number_of_files, **attrs)
     return res
Example #10
0
 def gen_runs(self, file_record, number_of_runs=1):
     "Generate run/lumis for a given file record"
     if  not isinstance(file_record, dict) or \
         not file_record.has_key('file') or \
         not file_record['file'].has_key('logical_file_name'):
         msg = 'To generate run/lumis please provide valid file record/JSON file'
         raise Exception(msg)
     row = deepcopy(file_record)
     records = []
     for idx in range(0, number_of_runs):
         run = random.randint(100000, 200000)
         for _ in range(0, random.randint(1, 10)):
             lumi = random.randint(1, 100)
             rec = {'run_num': str(run), 'lumi_section_num': str(lumi)}
             records.append(rec)
     row['file']['file_lumi_list'] = records
     return row
Example #11
0
 def gen_runs(self, file_record, number_of_runs=1):
     "Generate run/lumis for a given file record"
     if  not isinstance(file_record, dict) or \
         not file_record.has_key('file') or \
         not file_record['file'].has_key('logical_file_name'):
         msg = 'To generate run/lumis please provide valid file record/JSON file'
         raise Exception(msg)
     row = deepcopy(file_record)
     records = []
     for idx in range(0, number_of_runs):
         run  = random.randint(100000, 200000)
         for _ in range(0, random.randint(1, 10)):
             lumi = random.randint(1, 100)
             rec = {'run_num': str(run), 'lumi_section_num': str(lumi)}
             records.append(rec)
     row['file']['file_lumi_list'] = records
     return row
Example #12
0
    def dataset(self):
        "return dataset object"
        if not hasattr(self, '_dataset'):
            self.generate_dataset()

        phedex_data = {'dbs_name': self.dbs_name}
        dataset = deepcopy(self._dataset)
        dataset.update({'is-open': self.dataset_is_open})

        for block in dataset['blocks']:
            #update block information
            size = sum([f['file']['bytes'] for f in block['block']['files']])
            block['block'].update({
                "nfiles": len(block['block']['files']),
                "size": size
            })

        phedex_data.update(dict(dataset=dataset))
        return phedex_data
Example #13
0
 def add_files(self, input_dataset, number_of_files=1):
     "Add files to a given dataset"
     dataset = deepcopy(input_dataset)
     for block in dataset['dataset']['blocks']:
         if  block['block']['is-open'] != 'y':
             continue
         block_name = block['block']['name']
         _, prim, proc, tier = block_name.split('#')[0].split('/')
         attrs = {'prim':prim, 'proc':proc, 'tier':tier}
         res  = self.files(number_of_files, **attrs)
         size = 0
         for row in res:
             size += row['file']['bytes']
         if  block['block']['files']:
             block['block']['files'] += res
             block['block']['size'] += size
             block['block']['nfiles'] += len(res)
         else:
             block['block']['files'] = res
             block['block']['size'] = size
             block['block']['nfiles'] = len(res)
     return dataset
Example #14
0
 def add_files(self, input_dataset, number_of_files=1):
     "Add files to a given dataset"
     dataset = deepcopy(input_dataset)
     for block in dataset['dataset']['blocks']:
         if block['block']['is-open'] != 'y':
             continue
         block_name = block['block']['name']
         _, prim, proc, tier = block_name.split('#')[0].split('/')
         attrs = {'prim': prim, 'proc': proc, 'tier': tier}
         if "tags" in dataset['dataset']:
             attrs['tags'] = dataset['dataset']['tags']
         res = self.files(number_of_files, **attrs)
         size = 0
         for row in res:
             size += row['file']['bytes']
         if block['block']['files']:
             block['block']['files'] += res
             block['block']['size'] += size
             block['block']['nfiles'] += len(res)
         else:
             block['block']['files'] = res
             block['block']['size'] = size
             block['block']['nfiles'] = len(res)
     return dataset
Example #15
0
def workflow(fin, fout, verbose=None):
    "LifeCycle workflow"

    initial_payload = None # initial payload, should be provided by LifeCycle
    new_payload = [] # newly created payloads will be returned by LifeCycle

    with open(fin, 'r') as source:
        initial_payload = json.load(source)

    if  verbose:
        print "\n### input workflow"
        print pprint.pformat(initial_payload)

    ### read inputs from payload
    workflow = initial_payload['workflow']
    # check if input are read from configuration file
    try:
        cfg   = workflow['DataProviderCfg']
    except KeyError:
        #No configuration, try to use values provided in the workflow
        #for backward compatibility
        #values using get are optional
        cdict = { 'process' :
                  {'NumberOfDatasets' : workflow['NumberOfDatasets'],
                   'NumberOfBlocks' : workflow['NumberOfBlocks'],
                   'NumberOfFiles' : workflow['NumberOfFiles'],
                   'NumberOfRuns' : workflow['NumberOfRuns'],
                   'NumberOfLumis' : workflow['NumberOfLumis']},
                  'dbs' :
                  {'DBSSkipFileFail': workflow.get('DBSSkipFileFail', None),
                   'DBSChangeCksumFail': workflow.get('DBSChangeCksumFail', None),
                   'DBSChangeSizeFail': workflow.get('DBSChangeSizeFail', None)},
                  'phedex' :
                  {'PhedexSkipFileFail' : workflow.get('PhedexSkipFileFail', None),
                   'PhedexChangeCksumFail' : workflow.get('PhedexChangeCksumFail', None),
                   'PhedexChangeSizeFail' : workflow.get('PhedexChangeSizeFail', None),
                   'PhedexDBSName' : workflow['PhedexDBSName']}
                  }
    else:
        cdict = read_configparser(cfg)

    process_cfg = cdict['process']
    dbs_cfg = cdict['dbs']
    phedex_cfg = cdict['phedex']

    phedex_dbs_name = phedex_cfg.get('PhedexDBSName')
    number_of_datasets = int(process_cfg.get('NumberOfDatasets'))
    number_of_blocks = int(process_cfg.get('NumberOfBlocks'))
    number_of_files = int(process_cfg.get('NumberOfFiles'))
    number_of_runs = int(process_cfg.get('NumberOfRuns'))
    number_of_lumis = int(process_cfg.get('NumberOfLumis'))

    try:
        phedex_file  = float(phedex_cfg.get('PhedexSkipFileFail'))
        phedex_cksum = float(phedex_cfg.get('PhedexChangeCksumFail'))
        phedex_size  = float(phedex_cfg.get('PhedexChangeSizeFail'))

        dbs_file  = float(dbs_cfg.get('DBSSkipFileFail'))
        dbs_cksum = float(dbs_cfg.get('DBSChangeCksumFail'))
        dbs_size  = float(dbs_cfg.get('DBSChangeSizeFail'))
    # if value is None, the cast will fail, which means no failures are used
    except TypeError:
        failure_rates = None
    else:
        failure_rates = dict(PhedexSkipFileFail=phedex_file)
        failure_rates.update(PhedexChangeCksumFail=phedex_cksum)
        failure_rates.update(PhedexChangeSizeFail=phedex_size)
        failure_rates.update(DBSSkipFileFail=dbs_file)
        failure_rates.update(DBSChangeCksumFail=dbs_cksum)
        failure_rates.update(DBSChangeSizeFail=dbs_size)
    print failure_rates
    phedex_provider = PhedexProvider(dbs_name=phedex_dbs_name, failure_rates=failure_rates)
    dbs_provider = DBSProvider(failure_rates=failure_rates)

    for _ in xrange(number_of_datasets):
        #clone initial payload
        payload = deepcopy(initial_payload)
        phedex_provider.generate_dataset()
        phedex_provider.add_blocks(number_of_blocks)
        phedex_provider.add_files(number_of_files)
        payload['workflow']['Phedex'] = [phedex_provider.dataset()]
        payload['workflow']['DBS'] = dbs_provider.block_dump(number_of_runs,
                                                             number_of_lumis)
        phedex_provider.reset()
        new_payload.append(payload)

    with open(fout, 'w') as output:
        json.dump(new_payload, output)

    if  verbose:
        print "\n### output workflow"
        print pprint.pformat(new_payload)
Example #16
0
def workflow(fin, fout, verbose=None):
    "LifeCycle workflow"

    initial_payload = None  # initial payload, should be provided by LifeCycle
    new_payload = []  # newly created payloads will be returned by LifeCycle

    with open(fin, 'r') as source:
        initial_payload = json.load(source)

    if verbose:
        print "\n### input workflow"
        print pprint.pformat(initial_payload)

    ### read inputs from payload
    workflow = initial_payload['workflow']
    # check if input are read from configuration file
    try:
        cfg = workflow['DataProviderCfg']
    except KeyError:
        #No configuration, try to use values provided in the workflow
        #for backward compatibility
        #values using get are optional
        cdict = {
            'process': {
                'NumberOfDatasets': workflow['NumberOfDatasets'],
                'NumberOfBlocks': workflow['NumberOfBlocks'],
                'NumberOfFiles': workflow['NumberOfFiles'],
                'NumberOfRuns': workflow['NumberOfRuns'],
                'NumberOfLumis': workflow['NumberOfLumis']
            },
            'dbs': {
                'DBSSkipFileFail': workflow.get('DBSSkipFileFail', None),
                'DBSChangeCksumFail': workflow.get('DBSChangeCksumFail', None),
                'DBSChangeSizeFail': workflow.get('DBSChangeSizeFail', None)
            },
            'phedex': {
                'PhedexSkipFileFail':
                workflow.get('PhedexSkipFileFail', None),
                'PhedexChangeCksumFail':
                workflow.get('PhedexChangeCksumFail', None),
                'PhedexChangeSizeFail':
                workflow.get('PhedexChangeSizeFail', None),
                'PhedexDBSName':
                workflow['PhedexDBSName']
            }
        }
    else:
        cdict = read_configparser(cfg)

    process_cfg = cdict['process']
    dbs_cfg = cdict['dbs']
    phedex_cfg = cdict['phedex']

    phedex_dbs_name = phedex_cfg.get('PhedexDBSName')
    number_of_datasets = int(process_cfg.get('NumberOfDatasets'))
    number_of_blocks = int(process_cfg.get('NumberOfBlocks'))
    number_of_files = int(process_cfg.get('NumberOfFiles'))
    number_of_runs = int(process_cfg.get('NumberOfRuns'))
    number_of_lumis = int(process_cfg.get('NumberOfLumis'))

    try:
        phedex_file = float(phedex_cfg.get('PhedexSkipFileFail'))
        phedex_cksum = float(phedex_cfg.get('PhedexChangeCksumFail'))
        phedex_size = float(phedex_cfg.get('PhedexChangeSizeFail'))

        dbs_file = float(dbs_cfg.get('DBSSkipFileFail'))
        dbs_cksum = float(dbs_cfg.get('DBSChangeCksumFail'))
        dbs_size = float(dbs_cfg.get('DBSChangeSizeFail'))
    # if value is None, the cast will fail, which means no failures are used
    except TypeError:
        failure_rates = None
    else:
        failure_rates = dict(PhedexSkipFileFail=phedex_file)
        failure_rates.update(PhedexChangeCksumFail=phedex_cksum)
        failure_rates.update(PhedexChangeSizeFail=phedex_size)
        failure_rates.update(DBSSkipFileFail=dbs_file)
        failure_rates.update(DBSChangeCksumFail=dbs_cksum)
        failure_rates.update(DBSChangeSizeFail=dbs_size)
    print failure_rates
    phedex_provider = PhedexProvider(dbs_name=phedex_dbs_name,
                                     failure_rates=failure_rates)
    dbs_provider = DBSProvider(failure_rates=failure_rates)

    for _ in xrange(number_of_datasets):
        #clone initial payload
        payload = deepcopy(initial_payload)
        phedex_provider.generate_dataset()
        phedex_provider.add_blocks(number_of_blocks)
        phedex_provider.add_files(number_of_files)
        payload['workflow']['Phedex'] = [phedex_provider.dataset()]
        payload['workflow']['DBS'] = dbs_provider.block_dump(
            number_of_runs, number_of_lumis)
        phedex_provider.reset()
        new_payload.append(payload)

    with open(fout, 'w') as output:
        json.dump(new_payload, output)

    if verbose:
        print "\n### output workflow"
        print pprint.pformat(new_payload)