Example #1
0
def main():
    "Main function"
    optmgr = GeneratorOptionParser()
    opts, _args = optmgr.get_opt()

    if opts.system == 'dbs':
        mgr = DBSDataProvider(opts.fixed, opts.runs, opts.lumis)
    elif opts.system == 'phedex':
        mgr = PhedexDataProvider(opts.fixed)
    else:
        mgr = DataProvider(opts.fixed)
    if opts.system and not opts.action and not opts.generate:
        members = [n for n, _ in inspect.getmembers(mgr) if n[0] != '_']
        actions = [
            m for m in members if m.find('gen_') != -1 or m.find('add_') != -1
        ]
        generators = set(members) - set(actions)
        print opts.system, 'actions    :', ', '.join(actions)
        print opts.system, 'generators :', ', '.join(generators)
        sys.exit(0)
    number = opts.number  # number of entities to generate/add
    infile = opts.input  # input JSON file
    action = opts.action  # action to apply, e.g. add_blocks
    what = opts.generate  # method to gererate, e.g. datasets
    outdata = []  # output data going to output JSON
    attrs = {}  # additional attributes
    if opts.prim:
        attrs.update({'prim': opts.prim})
    if opts.proc:
        attrs.update({'proc': opts.proc})
    if opts.tier:
        attrs.update({'tier': opts.tier})
    if infile and what:
        msg = 'You cannot mix --generate and --input options, '
        msg += 'they are exclusive'
        print msg
        sys.exit(1)
    if what:
        outdata = getattr(mgr, what)(number, **attrs)
    if infile:
        if not action:
            msg = 'Please provide action to use'
            print msg
            sys.exit(1)
        with open(infile, 'r') as data_file:
            indata = json.load(data_file)
            for row in indata:
                res = getattr(mgr, action)(row, number)
                if isinstance(res, list):
                    outdata += res
                else:
                    outdata.append(res)
    outfile = what + '.json' if what else action + '.json'
    outfile = outfile.replace('add_', '').replace('gen_', '')
    if outdata:
        fname = opts.output if opts.output else outfile
        if infile and infile == fname:
            print "Input and output file names are identical, exit 1"
            sys.exit(1)
        with open(fname, 'w') as json_file:
            json_file.write(json.dumps(outdata))
    if opts.toprint:
        pprint.pprint(outdata)
Example #2
0
def main():
    "Main function"
    optmgr = GeneratorOptionParser()
    opts, _args = optmgr.get_opt()

    if  opts.system == 'dbs':
        mgr = DBSDataProvider(opts.fixed, opts.runs, opts.lumis)
    elif opts.system == 'phedex':
        mgr = PhedexDataProvider(opts.fixed)
    else:
        mgr = DataProvider(opts.fixed)
    if  opts.system and not opts.action and not opts.generate:
        members = [n for n, _ in inspect.getmembers(mgr) if n[0] != '_']
        actions = [m for m in members if m.find('gen_') != -1 or m.find('add_') != -1]
        generators = set(members) - set(actions)
        print opts.system, 'actions    :', ', '.join(actions)
        print opts.system, 'generators :', ', '.join(generators)
        sys.exit(0)
    number  = opts.number   # number of entities to generate/add
    infile  = opts.input    # input JSON file
    action  = opts.action   # action to apply, e.g. add_blocks
    what    = opts.generate # method to gererate, e.g. datasets
    outdata = []            # output data going to output JSON
    attrs   = {}            # additional attributes
    if  opts.prim:
        attrs.update({'prim': opts.prim})
    if  opts.proc:
        attrs.update({'proc': opts.proc})
    if  opts.tier:
        attrs.update({'tier': opts.tier})
    if  infile and what:
        msg  = 'You cannot mix --generate and --input options, '
        msg += 'they are exclusive'
        print msg
        sys.exit(1)
    if  what:
        outdata = getattr(mgr, what)(number, **attrs)
    if  infile:
        if  not action:
            msg = 'Please provide action to use'
            print msg
            sys.exit(1)
        with open(infile, 'r') as data_file:
            indata  = json.load(data_file)
            for row in indata:
                res = getattr(mgr, action)(row, number)
                if  isinstance(res, list):
                    outdata += res
                else:
                    outdata.append(res)
    outfile = what + '.json' if what else action + '.json'
    outfile = outfile.replace('add_', '').replace('gen_', '')
    if  outdata:
        fname = opts.output if opts.output else outfile
        if  infile and infile == fname:
            print "Input and output file names are identical, exit 1"
            sys.exit(1)
        with open(fname, 'w') as json_file:
            json_file.write(json.dumps(outdata))
    if  opts.toprint:
        pprint.pprint(outdata)
def workflow(fin, fout, verbose=None):
    "LifeCycle workflow"

    initial_payload = None # initial payload, should be provided by LifeCycle
    new_payload = [] # newly created payloads will be returned by LifeCycle

    with open(fin, 'r') as source:
        initial_payload = json.load(source)

    if  verbose:
        print "\n### input workflow"
        print pprint.pformat(initial_payload)

    ### read inputs from payload
    workflow = initial_payload['workflow']
    # check if input are read from configuration file
    try:
        cfg   = workflow['DataProviderCfg']
    except KeyError:
        #No configuration, try to use values provided in the workflow
        #for backward compatibility
        #values using get are optional
        cdict = { 'process' :
                  {'NumberOfDatasets' : workflow['NumberOfDatasets'],
                   'NumberOfBlocks' : workflow['NumberOfBlocks'],
                   'NumberOfFiles' : workflow['NumberOfFiles'],
                   'NumberOfRuns' : workflow['NumberOfRuns'],
                   'NumberOfLumis' : workflow['NumberOfLumis']},
                  'dbs' :
                  {'DBSSkipFileFail': workflow.get('DBSSkipFileFail', None),
                   'DBSChangeCksumFail': workflow.get('DBSChangeCksumFail', None),
                   'DBSChangeSizeFail': workflow.get('DBSChangeSizeFail', None)},
                  'phedex' :
                  {'PhedexSkipFileFail' : workflow.get('PhedexSkipFileFail', None),
                   'PhedexChangeCksumFail' : workflow.get('PhedexChangeCksumFail', None),
                   'PhedexChangeSizeFail' : workflow.get('PhedexChangeSizeFail', None),
                   'PhedexDBSName' : workflow['PhedexDBSName']}
                  }
    else:
        cdict = read_configparser(cfg)

    process_cfg = cdict['process']
    dbs_cfg = cdict['dbs']
    phedex_cfg = cdict['phedex']

    phedex_dbs_name = phedex_cfg.get('PhedexDBSName')
    number_of_datasets = int(process_cfg.get('NumberOfDatasets'))
    number_of_blocks = int(process_cfg.get('NumberOfBlocks'))
    number_of_files = int(process_cfg.get('NumberOfFiles'))
    number_of_runs = int(process_cfg.get('NumberOfRuns'))
    number_of_lumis = int(process_cfg.get('NumberOfLumis'))

    try:
        phedex_file  = float(phedex_cfg.get('PhedexSkipFileFail'))
        phedex_cksum = float(phedex_cfg.get('PhedexChangeCksumFail'))
        phedex_size  = float(phedex_cfg.get('PhedexChangeSizeFail'))

        dbs_file  = float(dbs_cfg.get('DBSSkipFileFail'))
        dbs_cksum = float(dbs_cfg.get('DBSChangeCksumFail'))
        dbs_size  = float(dbs_cfg.get('DBSChangeSizeFail'))
    # if value is None, the cast will fail, which means no failures are used
    except TypeError:
        failure_rates = None
    else:
        failure_rates = dict(PhedexSkipFileFail=phedex_file)
        failure_rates.update(PhedexChangeCksumFail=phedex_cksum)
        failure_rates.update(PhedexChangeSizeFail=phedex_size)
        failure_rates.update(DBSSkipFileFail=dbs_file)
        failure_rates.update(DBSChangeCksumFail=dbs_cksum)
        failure_rates.update(DBSChangeSizeFail=dbs_size)
    print failure_rates
    phedex_provider = PhedexProvider(dbs_name=phedex_dbs_name, failure_rates=failure_rates)
    dbs_provider = DBSProvider(failure_rates=failure_rates)

    for _ in xrange(number_of_datasets):
        #clone initial payload
        payload = deepcopy(initial_payload)
        phedex_provider.generate_dataset()
        phedex_provider.add_blocks(number_of_blocks)
        phedex_provider.add_files(number_of_files)
        payload['workflow']['Phedex'] = [phedex_provider.dataset()]
        payload['workflow']['DBS'] = dbs_provider.block_dump(number_of_runs,
                                                             number_of_lumis)
        phedex_provider.reset()
        new_payload.append(payload)

    with open(fout, 'w') as output:
        json.dump(new_payload, output)

    if  verbose:
        print "\n### output workflow"
        print pprint.pformat(new_payload)
Example #4
0
def workflow(fin, fout, verbose=None):
    "LifeCycle workflow"

    initial_payload = None  # initial payload, should be provided by LifeCycle
    new_payload = []  # newly created payloads will be returned by LifeCycle

    with open(fin, 'r') as source:
        initial_payload = json.load(source)

    if verbose:
        print "\n### input workflow"
        print pprint.pformat(initial_payload)

    ### read inputs from payload
    workflow = initial_payload['workflow']
    # check if input are read from configuration file
    try:
        cfg = workflow['DataProviderCfg']
    except KeyError:
        #No configuration, try to use values provided in the workflow
        #for backward compatibility
        #values using get are optional
        cdict = {
            'process': {
                'NumberOfDatasets': workflow['NumberOfDatasets'],
                'NumberOfBlocks': workflow['NumberOfBlocks'],
                'NumberOfFiles': workflow['NumberOfFiles'],
                'NumberOfRuns': workflow['NumberOfRuns'],
                'NumberOfLumis': workflow['NumberOfLumis']
            },
            'dbs': {
                'DBSSkipFileFail': workflow.get('DBSSkipFileFail', None),
                'DBSChangeCksumFail': workflow.get('DBSChangeCksumFail', None),
                'DBSChangeSizeFail': workflow.get('DBSChangeSizeFail', None)
            },
            'phedex': {
                'PhedexSkipFileFail':
                workflow.get('PhedexSkipFileFail', None),
                'PhedexChangeCksumFail':
                workflow.get('PhedexChangeCksumFail', None),
                'PhedexChangeSizeFail':
                workflow.get('PhedexChangeSizeFail', None),
                'PhedexDBSName':
                workflow['PhedexDBSName']
            }
        }
    else:
        cdict = read_configparser(cfg)

    process_cfg = cdict['process']
    dbs_cfg = cdict['dbs']
    phedex_cfg = cdict['phedex']

    phedex_dbs_name = phedex_cfg.get('PhedexDBSName')
    number_of_datasets = int(process_cfg.get('NumberOfDatasets'))
    number_of_blocks = int(process_cfg.get('NumberOfBlocks'))
    number_of_files = int(process_cfg.get('NumberOfFiles'))
    number_of_runs = int(process_cfg.get('NumberOfRuns'))
    number_of_lumis = int(process_cfg.get('NumberOfLumis'))

    try:
        phedex_file = float(phedex_cfg.get('PhedexSkipFileFail'))
        phedex_cksum = float(phedex_cfg.get('PhedexChangeCksumFail'))
        phedex_size = float(phedex_cfg.get('PhedexChangeSizeFail'))

        dbs_file = float(dbs_cfg.get('DBSSkipFileFail'))
        dbs_cksum = float(dbs_cfg.get('DBSChangeCksumFail'))
        dbs_size = float(dbs_cfg.get('DBSChangeSizeFail'))
    # if value is None, the cast will fail, which means no failures are used
    except TypeError:
        failure_rates = None
    else:
        failure_rates = dict(PhedexSkipFileFail=phedex_file)
        failure_rates.update(PhedexChangeCksumFail=phedex_cksum)
        failure_rates.update(PhedexChangeSizeFail=phedex_size)
        failure_rates.update(DBSSkipFileFail=dbs_file)
        failure_rates.update(DBSChangeCksumFail=dbs_cksum)
        failure_rates.update(DBSChangeSizeFail=dbs_size)
    print failure_rates
    phedex_provider = PhedexProvider(dbs_name=phedex_dbs_name,
                                     failure_rates=failure_rates)
    dbs_provider = DBSProvider(failure_rates=failure_rates)

    for _ in xrange(number_of_datasets):
        #clone initial payload
        payload = deepcopy(initial_payload)
        phedex_provider.generate_dataset()
        phedex_provider.add_blocks(number_of_blocks)
        phedex_provider.add_files(number_of_files)
        payload['workflow']['Phedex'] = [phedex_provider.dataset()]
        payload['workflow']['DBS'] = dbs_provider.block_dump(
            number_of_runs, number_of_lumis)
        phedex_provider.reset()
        new_payload.append(payload)

    with open(fout, 'w') as output:
        json.dump(new_payload, output)

    if verbose:
        print "\n### output workflow"
        print pprint.pformat(new_payload)