Esempio n. 1
0
    # "root://deepthought.crc.nd.edu//store/user/khurtado/lobster_mc_" + version,
    # "chirp://eddie.crc.nd.edu:9094/store/user/khurtado/lobster_test_" + version,
    "gsiftp://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version,
    # "srm://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version,
])

workflows = []

lhe = Workflow(label='lhe_step',
               pset='mc_gen/HIG-RunIIWinter15wmLHE-00196_1_cfg.py',
               sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_16_patch1'),
               merge_size='10M',
               dataset=ProductionDataset(events_per_task=50,
                                         events_per_lumi=5,
                                         number_of_tasks=10),
               category=Category(name='lhe', cores=1, memory=1000))

gs = Workflow(label='gs_step',
              pset='mc_gen/HIG-RunIISummer15GS-00177_1_cfg.py',
              sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_18'),
              merge_size='100M',
              dataset=ParentDataset(parent=lhe, units_per_task=1),
              category=Category(name='gs',
                                cores=1,
                                memory=2000,
                                runtime=45 * 60))

digi = Workflow(label='digi_step',
                pset='mc_gen/HIG-RunIIFall15DR76-00243_1_cfg.py',
                sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_6_1'),
                merge_size='200M',
Esempio n. 2
0
        relpath = os.path.relpath(path,input_path_full)
        lhe_dirs.append(os.path.join(relpath,fd))


#################################################################
# Worker Res.:
#   Cores:  12    | 4
#   Memory: 16000 | 8000
#   Disk:   13000 | 6500
#################################################################
# Need to be careful with using 'runtime' setting, as it can cause us to exceed the workers resources
gs_resources = Category(
    name='gs',
    cores=6,
    memory=3000,
    disk=3000,
    tasks_min=12,
    #runtime=3600,
    mode='fixed'
)

digi_resources = Category(
    name='digi',
    cores=6,
    memory=7800,
    disk=6000,
    #runtime=3600,
    mode='fixed'
)

reco_resources = Category(
Esempio n. 3
0
storage = StorageConfiguration(
    output=[
        "hdfs://eddie.crc.nd.edu:19000/store/user/$USER/lobster_test_" + version,
        "file:///hadoop/store/user/$USER/lobster_test_" + version,
        # ND is not in the XrootD redirector, thus hardcode server.
        # Note the double-slash after the hostname!
        "root://deepthought.crc.nd.edu//store/user/$USER/lobster_test_" + version,
        "chirp://eddie.crc.nd.edu:9094/store/user/$USER/lobster_test_" + version,
        "gsiftp://T3_US_NotreDame/store/user/$USER/lobster_test_" + version,
        "srm://T3_US_NotreDame/store/user/$USER/lobster_test_" + version
    ]
)

processing = Category(
    name='processing',
    cores=1,
    runtime=900,
    memory=1000
)

workflows = []

ttH = Workflow(
    label='ttH',
    dataset=cmssw.Dataset(
        dataset='/ttHToNonbb_M125_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM',
        lumis_per_task=20,
        file_based=True
    ),
    category=processing,
    command='root -b -q -l script_macro.C @outputfiles @inputfiles',
    extra_inputs=['script_macro.C'],
Esempio n. 4
0
for dset, tasksize, events in zip(datasets, tasksizes, events):
    tasks = int(events / tasksize)

    lhe = Workflow(
        label=dset + '_lhe',
        pset='configs/' + dset + '_lhe.py',
        merge_size='2000M',
        dataset=ProductionDataset(
            events_per_task=tasksize,
            events_per_lumi=200,
            number_of_tasks=tasks
        ),
        category=Category(
            name='lhe',
            cores=2,
            disk=2000,
            memory=2000
        ),
        sandbox=[
            cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1/CMSSW_8_0_21'),
            cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1_rh7/CMSSW_8_0_21')
        ]
    )

    aod = Workflow(
        label=dset + '_aod',
        pset='configs/' + dset + '_aod.py',
        dataset=ParentDataset(
            parent=lhe,
            units_per_task=4
        ),
Esempio n. 5
0
        "file:///hadoop" + output_path,
    ],
    disable_input_streaming=False,
)

########## Resources for each step ##########

# Worker Res.:
#   Cores:  12    | 4
#   Memory: 16000 | 8000
#   Disk:   13000 | 6500

gen_resources = Category(name='gen',
                         cores=1,
                         memory=2000,
                         disk=1000,
                         tasks_min=12,
                         tasks_max=3000,
                         mode='fixed')
sim_resources = Category(name='sim',
                         cores=6,
                         memory=3000,
                         disk=3000,
                         tasks_min=12,
                         mode='fixed')
digi_resources = Category(name='digi',
                          cores=6,
                          memory=7800,
                          disk=6000,
                          mode='fixed')
hlt_resources = Category(name='hlt',
storage = StorageConfiguration(
    output=[
        "hdfs://eddie.crc.nd.edu:19000" + output_path,
        "file:///hadoop" + output_path,
        # ND is not in the XrootD redirector, thus hardcode server.
        "root://deepthought.crc.nd.edu/" +
        output_path,  # Note the extra slash after the hostname!
        "gsiftp://T3_US_NotreDame" + output_path,
        "srm://T3_US_NotreDame" + output_path,
    ], )

processing = Category(
    name='processing',
    #mode='fixed',
    cores=1,
    memory=1200,
    #disk=1000
    #disk=2900
    disk=6000)

wf = []

ds_helper = DatasetHelper()
ds_helper.load(os.path.join(GIT_REPO_DIR, "GenReader/data/JSON/datasets.json"))

width = 1
samples = [
    #'central_tZq',
    #'central_tZq_new_pmx_v2',
    #'central_ttH',
    #'central_ttW',
        "srm://T3_US_NotreDame" + input_path,
    ],
    output=[
        "hdfs://eddie.crc.nd.edu:19000" + output_path,
        "file:///hadoop" + output_path,
        # ND is not in the XrootD redirector, thus hardcode server.
        "root://deepthought.crc.nd.edu/" +
        output_path,  # Note the extra slash after the hostname!
        "gsiftp://T3_US_NotreDame" + output_path,
        "srm://T3_US_NotreDame" + output_path,
    ],
)

processing = Category(name='processing',
                      cores=1,
                      memory=1200,
                      disk=1000
                      #mode='fixed'
                      )

maod_dirs = []
for path in dir_list:
    for f in os.listdir(path):
        if not os.path.isdir(path):
            continue
        arr = f.split('_')
        if arr[0] != 'mAOD':
            continue
        elif len(os.listdir(path)) == 0:
            print "[WARNING] Skipping empty directory, %s" % (f)
            continue
        p, c, r = arr[2], arr[3], arr[4]
Esempio n. 8
0
    'lhe_cfg': 'python_cfgs/central/TTWJetsToLNu_TuneCP5_PSweights_13TeV-amcatnloFXFX-madspin-pythia8/LHE-00000_1_cfg.py',
    'gen_cfg': 'python_cfgs/central/TTWJetsToLNu_TuneCP5_PSweights_13TeV-amcatnloFXFX-madspin-pythia8/GEN-00000_1_cfg.py',
    'lhe_release': 'CMSSW_9_3_4',
    'gen_release': 'CMSSW_9_3_4',
}

gridpacks = [
    tZq_info,
]

# Note: The tllq4fMatchedNoSchanW gridpacks seem to require ~2600 MB disk

lhe_resources = Category(
    name='lhe',
    mode='fixed',
    cores=1,
    memory=1200,
    disk=2900
)

gen_resources = Category(
    name='gen',
    mode='fixed',
    cores=1,
    memory=1200,
    disk=2900
)

wf = []

print "Generating workflows:"
Esempio n. 9
0
wf = []

print "Generating workflows:"
for idx, gridpack in enumerate(gridpack_list):
    head, tail = os.path.split(gridpack)
    arr = tail.split('_')
    p, c, r = arr[0], arr[1], arr[2]
    c = c.replace('-', '')  # Lobster doesn't like labels with dashes in them

    label = 'lhe_step_{p}_{c}_{r}'.format(p=p, c=c, r=r)
    cat_name = 'lhe_{p}'.format(p=p)
    print "Label and cat name:", label, cat_name
    if not cat_name in cat_dict:
        cat_dict[cat_name] = Category(
            name=cat_name,
            #mode='fixed',
            cores=1,
            memory=1200,
            disk=2900)
    cat = cat_dict[cat_name]

    wf_fragments = {}
    for step in wf_steps:
        if fragment_map.has_key(p) and fragment_map[p].has_key(step):
            wf_fragments[step] = fragment_map[p][step]
        else:
            wf_fragments[step] = fragment_map['default'][step]
    multiplier = event_multiplier['default']
    if event_multiplier.has_key(p):
        multiplier = event_multiplier[p]
    nevents = int(multiplier * events_per_gridpack)
    print "\t[{0}/{1}] Gridpack: {gp} (nevts {events})".format(
Esempio n. 10
0
        "srm://T3_US_NotreDame" + output_path,
        "file:///hadoop" + output_path,
    ],
    disable_input_streaming=False,
)

########## Resources for each step ##########

# Worker Res.:
#   Cores:  12    | 4
#   Memory: 16000 | 8000
#   Disk:   13000 | 6500

naod_resources = Category(name='naod',
                          cores=2,
                          memory=3500,
                          disk=2000,
                          mode='fixed')

########## Set up dictionary for cfg files ##########

wf_steps = ['naod']
ul_base = 'ul_cfgs'

ul_cfg_map = {
    'UL16': {
        'all_procs': {
            'naod': os.path.join(ul_base, 'UL16_NAOD_cfg.py'),
        }
    },
    'UL16APV': {
Esempio n. 11
0
## Specify LHE dirs by hand:
#lhe_dirs = [
#    #"kmohrman/FullProduction/Round6/Batch8/LHE_step/v1/lhe_step_ttHJet_HanV4ttXJetStartPtChecks_run0",
#]

#################################################################
# Worker Res.:
#   Cores:  12    | 4
#   Memory: 16000 | 8000
#   Disk:   13000 | 6500
#################################################################
# Need to be careful with using 'runetime' setting, as it can cause us to exceed the workers resources
gen_resources = Category(name='gen',
                         cores=1,
                         memory=1200,
                         disk=1000,
                         tasks_min=12,
                         tasks_max=3000,
                         mode='fixed')
#################################################################

wf_steps = ['gen']
# Some example NLO configs we used while testing NLO samples for pheno paper. We did not get to the point of fully understanding the NLO samples, so these configs may not be fully correct or trustworthy.
fragment_map_NLO = {
    'ttH': { # Reza's sample with this name does not have an extra jet explicitly
        'gen': 'python_cfgs/GEN/NLO/HIG-RunIIFall17wmLHEGS-00054_1_cfg.py',
    },
    'ttW': { # Reza's sample with this name does not have an extra jet explicitly
        'gen': 'python_cfgs/GEN/NLO/TOP-RunIIFall17wmLHEGS-00076_1_cfg.py', # No matching
    },
    'ttZ': { # Reza's sample with this name does not have an extra jet explicitly
Esempio n. 12
0
        "root://deepthought.crc.nd.edu/" +
        output_path,  # Note the extra slash after the hostname!
        "gsiftp://T3_US_NotreDame" + output_path,
        "srm://T3_US_NotreDame" + output_path,
        "file:///hadoop" + output_path,
    ],
    disable_input_streaming=True,
)

#################################################################
# Worker Res.:
#   Cores:  12    | 4
#   Memory: 16000 | 8000
#   Disk:   13000 | 6500
#################################################################
gs_resources = Category(name='gs', cores=1, memory=1500, disk=2000)
#################################################################
#samples["Tt_Pu200_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_Tt_Pu200_110D49"],    "10000",    "pion"]
#samples["SingleMuFlatPt1p5To8_Pu200_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleMuFlatPt1p5To8_Pu200_110D49"],    "100000",    "mu"]
#samples["SingleMuFlatPt1p5To8_Pu0_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleMuFlatPt1p5To8_Pu0_110D49"],    "100000",    "mu"]
#samples["SingleEFlatPt1p5To8_Pu200_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleEFlatPt1p5To8_Pu200_110D49"],    "100000",    "ele"]
#samples["SingleEFlatPt1p5To8_Pu0_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleEFlatPt1p5To8_Pu0_110D49"],    "100000",    "ele"]
#samples["DisplacedMuPt1p5To8_Pu200_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_DisplacedMuPt1p5To8_Pu200_110D49"],    "100000",    "mu"]
#samples["DisplacedMuPt1p5To8_Pu0_110D49"]=[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_DisplacedMuPt1p5To8_Pu0_110D49"],    "100000",    "mu"]

#samples["FE_TightTune_Tt_Pu200_110D49"]                    =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_Tt_Pu200_110D49"],    "10000",    "pion"]
#samples["FE_TightTune_SingleMuFlatPt1p5To8_Pu200_110D49"]  =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleMuFlatPt1p5To8_Pu200_110D49"],    "100000",    "mu"]
#samples["FE_TightTune_SingleMuFlatPt1p5To8_Pu0_110D49"]    =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleMuFlatPt1p5To8_Pu0_110D49"],    "100000",    "mu"]
#samples["FE_TightTune_SingleEFlatPt1p5To8_Pu200_110D49"]   =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleEFlatPt1p5To8_Pu200_110D49"],    "100000",    "ele"]
#samples["FE_TightTune_SingleEFlatPt1p5To8_Pu0_110D49"]     =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleEFlatPt1p5To8_Pu0_110D49"],    "100000",    "ele"]
#samples["FE_TightTune_DisplacedMuPt1p5To8_Pu200_110D49"]   =[    ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_DisplacedMuPt1p5To8_Pu200_110D49"],    "100000",    "mu"]