# "root://deepthought.crc.nd.edu//store/user/khurtado/lobster_mc_" + version, # "chirp://eddie.crc.nd.edu:9094/store/user/khurtado/lobster_test_" + version, "gsiftp://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, # "srm://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, ]) workflows = [] lhe = Workflow(label='lhe_step', pset='mc_gen/HIG-RunIIWinter15wmLHE-00196_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_16_patch1'), merge_size='10M', dataset=ProductionDataset(events_per_task=50, events_per_lumi=5, number_of_tasks=10), category=Category(name='lhe', cores=1, memory=1000)) gs = Workflow(label='gs_step', pset='mc_gen/HIG-RunIISummer15GS-00177_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_18'), merge_size='100M', dataset=ParentDataset(parent=lhe, units_per_task=1), category=Category(name='gs', cores=1, memory=2000, runtime=45 * 60)) digi = Workflow(label='digi_step', pset='mc_gen/HIG-RunIIFall15DR76-00243_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_6_1'), merge_size='200M',
relpath = os.path.relpath(path,input_path_full) lhe_dirs.append(os.path.join(relpath,fd)) ################################################################# # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 ################################################################# # Need to be careful with using 'runtime' setting, as it can cause us to exceed the workers resources gs_resources = Category( name='gs', cores=6, memory=3000, disk=3000, tasks_min=12, #runtime=3600, mode='fixed' ) digi_resources = Category( name='digi', cores=6, memory=7800, disk=6000, #runtime=3600, mode='fixed' ) reco_resources = Category(
storage = StorageConfiguration( output=[ "hdfs://eddie.crc.nd.edu:19000/store/user/$USER/lobster_test_" + version, "file:///hadoop/store/user/$USER/lobster_test_" + version, # ND is not in the XrootD redirector, thus hardcode server. # Note the double-slash after the hostname! "root://deepthought.crc.nd.edu//store/user/$USER/lobster_test_" + version, "chirp://eddie.crc.nd.edu:9094/store/user/$USER/lobster_test_" + version, "gsiftp://T3_US_NotreDame/store/user/$USER/lobster_test_" + version, "srm://T3_US_NotreDame/store/user/$USER/lobster_test_" + version ] ) processing = Category( name='processing', cores=1, runtime=900, memory=1000 ) workflows = [] ttH = Workflow( label='ttH', dataset=cmssw.Dataset( dataset='/ttHToNonbb_M125_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM', lumis_per_task=20, file_based=True ), category=processing, command='root -b -q -l script_macro.C @outputfiles @inputfiles', extra_inputs=['script_macro.C'],
for dset, tasksize, events in zip(datasets, tasksizes, events): tasks = int(events / tasksize) lhe = Workflow( label=dset + '_lhe', pset='configs/' + dset + '_lhe.py', merge_size='2000M', dataset=ProductionDataset( events_per_task=tasksize, events_per_lumi=200, number_of_tasks=tasks ), category=Category( name='lhe', cores=2, disk=2000, memory=2000 ), sandbox=[ cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1/CMSSW_8_0_21'), cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1_rh7/CMSSW_8_0_21') ] ) aod = Workflow( label=dset + '_aod', pset='configs/' + dset + '_aod.py', dataset=ParentDataset( parent=lhe, units_per_task=4 ),
"file:///hadoop" + output_path, ], disable_input_streaming=False, ) ########## Resources for each step ########## # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 gen_resources = Category(name='gen', cores=1, memory=2000, disk=1000, tasks_min=12, tasks_max=3000, mode='fixed') sim_resources = Category(name='sim', cores=6, memory=3000, disk=3000, tasks_min=12, mode='fixed') digi_resources = Category(name='digi', cores=6, memory=7800, disk=6000, mode='fixed') hlt_resources = Category(name='hlt',
storage = StorageConfiguration( output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, "file:///hadoop" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, ], ) processing = Category( name='processing', #mode='fixed', cores=1, memory=1200, #disk=1000 #disk=2900 disk=6000) wf = [] ds_helper = DatasetHelper() ds_helper.load(os.path.join(GIT_REPO_DIR, "GenReader/data/JSON/datasets.json")) width = 1 samples = [ #'central_tZq', #'central_tZq_new_pmx_v2', #'central_ttH', #'central_ttW',
"srm://T3_US_NotreDame" + input_path, ], output=[ "hdfs://eddie.crc.nd.edu:19000" + output_path, "file:///hadoop" + output_path, # ND is not in the XrootD redirector, thus hardcode server. "root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, ], ) processing = Category(name='processing', cores=1, memory=1200, disk=1000 #mode='fixed' ) maod_dirs = [] for path in dir_list: for f in os.listdir(path): if not os.path.isdir(path): continue arr = f.split('_') if arr[0] != 'mAOD': continue elif len(os.listdir(path)) == 0: print "[WARNING] Skipping empty directory, %s" % (f) continue p, c, r = arr[2], arr[3], arr[4]
'lhe_cfg': 'python_cfgs/central/TTWJetsToLNu_TuneCP5_PSweights_13TeV-amcatnloFXFX-madspin-pythia8/LHE-00000_1_cfg.py', 'gen_cfg': 'python_cfgs/central/TTWJetsToLNu_TuneCP5_PSweights_13TeV-amcatnloFXFX-madspin-pythia8/GEN-00000_1_cfg.py', 'lhe_release': 'CMSSW_9_3_4', 'gen_release': 'CMSSW_9_3_4', } gridpacks = [ tZq_info, ] # Note: The tllq4fMatchedNoSchanW gridpacks seem to require ~2600 MB disk lhe_resources = Category( name='lhe', mode='fixed', cores=1, memory=1200, disk=2900 ) gen_resources = Category( name='gen', mode='fixed', cores=1, memory=1200, disk=2900 ) wf = [] print "Generating workflows:"
wf = [] print "Generating workflows:" for idx, gridpack in enumerate(gridpack_list): head, tail = os.path.split(gridpack) arr = tail.split('_') p, c, r = arr[0], arr[1], arr[2] c = c.replace('-', '') # Lobster doesn't like labels with dashes in them label = 'lhe_step_{p}_{c}_{r}'.format(p=p, c=c, r=r) cat_name = 'lhe_{p}'.format(p=p) print "Label and cat name:", label, cat_name if not cat_name in cat_dict: cat_dict[cat_name] = Category( name=cat_name, #mode='fixed', cores=1, memory=1200, disk=2900) cat = cat_dict[cat_name] wf_fragments = {} for step in wf_steps: if fragment_map.has_key(p) and fragment_map[p].has_key(step): wf_fragments[step] = fragment_map[p][step] else: wf_fragments[step] = fragment_map['default'][step] multiplier = event_multiplier['default'] if event_multiplier.has_key(p): multiplier = event_multiplier[p] nevents = int(multiplier * events_per_gridpack) print "\t[{0}/{1}] Gridpack: {gp} (nevts {events})".format(
"srm://T3_US_NotreDame" + output_path, "file:///hadoop" + output_path, ], disable_input_streaming=False, ) ########## Resources for each step ########## # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 naod_resources = Category(name='naod', cores=2, memory=3500, disk=2000, mode='fixed') ########## Set up dictionary for cfg files ########## wf_steps = ['naod'] ul_base = 'ul_cfgs' ul_cfg_map = { 'UL16': { 'all_procs': { 'naod': os.path.join(ul_base, 'UL16_NAOD_cfg.py'), } }, 'UL16APV': {
## Specify LHE dirs by hand: #lhe_dirs = [ # #"kmohrman/FullProduction/Round6/Batch8/LHE_step/v1/lhe_step_ttHJet_HanV4ttXJetStartPtChecks_run0", #] ################################################################# # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 ################################################################# # Need to be careful with using 'runetime' setting, as it can cause us to exceed the workers resources gen_resources = Category(name='gen', cores=1, memory=1200, disk=1000, tasks_min=12, tasks_max=3000, mode='fixed') ################################################################# wf_steps = ['gen'] # Some example NLO configs we used while testing NLO samples for pheno paper. We did not get to the point of fully understanding the NLO samples, so these configs may not be fully correct or trustworthy. fragment_map_NLO = { 'ttH': { # Reza's sample with this name does not have an extra jet explicitly 'gen': 'python_cfgs/GEN/NLO/HIG-RunIIFall17wmLHEGS-00054_1_cfg.py', }, 'ttW': { # Reza's sample with this name does not have an extra jet explicitly 'gen': 'python_cfgs/GEN/NLO/TOP-RunIIFall17wmLHEGS-00076_1_cfg.py', # No matching }, 'ttZ': { # Reza's sample with this name does not have an extra jet explicitly
"root://deepthought.crc.nd.edu/" + output_path, # Note the extra slash after the hostname! "gsiftp://T3_US_NotreDame" + output_path, "srm://T3_US_NotreDame" + output_path, "file:///hadoop" + output_path, ], disable_input_streaming=True, ) ################################################################# # Worker Res.: # Cores: 12 | 4 # Memory: 16000 | 8000 # Disk: 13000 | 6500 ################################################################# gs_resources = Category(name='gs', cores=1, memory=1500, disk=2000) ################################################################# #samples["Tt_Pu200_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_Tt_Pu200_110D49"], "10000", "pion"] #samples["SingleMuFlatPt1p5To8_Pu200_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleMuFlatPt1p5To8_Pu200_110D49"], "100000", "mu"] #samples["SingleMuFlatPt1p5To8_Pu0_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleMuFlatPt1p5To8_Pu0_110D49"], "100000", "mu"] #samples["SingleEFlatPt1p5To8_Pu200_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleEFlatPt1p5To8_Pu200_110D49"], "100000", "ele"] #samples["SingleEFlatPt1p5To8_Pu0_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_SingleEFlatPt1p5To8_Pu0_110D49"], "100000", "ele"] #samples["DisplacedMuPt1p5To8_Pu200_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_DisplacedMuPt1p5To8_Pu200_110D49"], "100000", "mu"] #samples["DisplacedMuPt1p5To8_Pu0_110D49"]=[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/L1Stub_DisplacedMuPt1p5To8_Pu0_110D49"], "100000", "mu"] #samples["FE_TightTune_Tt_Pu200_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_Tt_Pu200_110D49"], "10000", "pion"] #samples["FE_TightTune_SingleMuFlatPt1p5To8_Pu200_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleMuFlatPt1p5To8_Pu200_110D49"], "100000", "mu"] #samples["FE_TightTune_SingleMuFlatPt1p5To8_Pu0_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleMuFlatPt1p5To8_Pu0_110D49"], "100000", "mu"] #samples["FE_TightTune_SingleEFlatPt1p5To8_Pu200_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleEFlatPt1p5To8_Pu200_110D49"], "100000", "ele"] #samples["FE_TightTune_SingleEFlatPt1p5To8_Pu0_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_SingleEFlatPt1p5To8_Pu0_110D49"], "100000", "ele"] #samples["FE_TightTune_DisplacedMuPt1p5To8_Pu200_110D49"] =[ ["rgoldouz/FullProduction/L1tracker_DAS_110pre8/FE_TightTune_L1Stub_DisplacedMuPt1p5To8_Pu200_110D49"], "100000", "mu"]