arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1, tot=len(maod_dirs), dir=maod_dir) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command=' '.join(cms_cmd), sandbox=cmssw.Sandbox( release='../../../../../CMSSW_10_6_8' ), # This file should be in CMSSW_10_6_8/src/EFTGenReader/LHEReader/test/lobster. TODO: Specify path in a better way. merge_size='1.0G', cleanup_input=False, dataset=Dataset(files=maod_dir, files_per_task=5, patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160], log_level=1,
continue elif len(runs_whitelist) > 0 and not r in runs_whitelist: continue lhe_dirs.append(f) wf = [] print "Generating workflows:" for idx, lhe_dir in enumerate(lhe_dirs): arr = lhe_dir.split('_') p, c, r = arr[2], arr[3], arr[4] print "\t[%d/%d] LHE Input: %s" % (idx + 1, len(lhe_dirs), lhe_dir) output = Workflow(label='output_%s_%s_%s' % (p, c, r), command='cmsRun EFTLHEReader_cfg.py', merge_size='1.0G', cleanup_input=False, dataset=Dataset(files=lhe_dir, files_per_task=10, patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160], log_level=1, ))
else: raise ValueError("can't find dataset associated with {}".format(path)) part = counter.get(dset, 1) counter[dset] = part + 1 aod = Workflow( label='{}_aod_p{}'.format(dset, part), pset='configs/' + dset + '_aod.py', dataset=cmssw.Dataset( dataset=path, dbs_instance='phys03', lumis_per_task=4 ), category=Category( name='aod', cores=2, disk=1000, memory=3000, runtime=120 * 60 ), sandbox=[ cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1/CMSSW_8_0_21'), cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1_rh7/CMSSW_8_0_21') ] ) maod = Workflow( label='{}_maod_p{}'.format(dset, part), pset='configs/' + dset + '_maod.py', merge_size='2000M', cleanup_input=True,
'minPtJet=30.0', 'maxEtaJet=2.5', 'maxEtaLep=2.5' ]) if not is_eft: cms_cmd.extend(['iseft=False']) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) # The workflow label can't have any dashes (-) in it, so remove them safe_label_name = sample_name.replace('-', '') output = Workflow( label='output_{label}'.format(label=safe_label_name), command=' '.join(cms_cmd), sandbox=cmssw.Sandbox( release='../../../../../CMSSW_10_6_8/' ), # This file should be in CMSSW_10_6_8/src/EFTGenReader/GenReader/test/lobster. TODO: Specify path in a better way. cleanup_input=False, outputs=['output_tree.root'], merge_size= merge_size, # Note: Lobster takes a very long time trying to merge large numbers of small files for some reason dataset=ds, merge_command='hadd @outputfiles @inputfiles', category=processing) wf.extend([output]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf,
name='processing', cores=1, runtime=900, memory=1000 ) workflows = [] ttH = Workflow( label='ttH', dataset=cmssw.Dataset( dataset='/ttHToNonbb_M125_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM', lumis_per_task=20, file_based=True ), category=processing, command='root -b -q -l script_macro.C @outputfiles @inputfiles', extra_inputs=['script_macro.C'], publish_label='test', merge_command='hadd @outputfiles @inputfiles', merge_size='3.5G', outputs=['output.root'] ) workflows.append(ttH) config = Config( workdir='/tmpscratch/users/$USER/lobster_test_' + version, plotdir='~/www/lobster/test_' + version, storage=storage, workflows=workflows,
for key, value in samples.items(): if 'DisplacedMu' not in key: continue print key Analysis = Workflow( label='FE_L1Analysis_%s' % (key), sandbox=cmssw.Sandbox( release='/afs/crc.nd.edu/user/r/rgoldouz/CMSSW_10_4_0'), dataset=Dataset(files=value[0], files_per_task=50), globaltag=False, command='python Lobster_check.py ' + value[1] + ' ' + value[2] + ' @inputfiles', extra_inputs=[ 'Lobster_check.py', '../lib/main.so', '../include/MyAnalysis.h', ], outputs=['ANoutput.root'], # dataset=Dataset( # files=value[0], # files_per_task=50, # patterns=["*.root"] # ), # merge_command='hadd @outputfiles @inputfiles', # merge_size='3.5G', category=gs_resources) wf.append(Analysis) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path,
processing = Category( name='processing', cores=1, runtime=900, memory=1000 ) workflows = [] ttH = Workflow( label='ttH', dataset=cmssw.Dataset( dataset='/ttHToNonbb_M125_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM', events_per_task=50000 ), category=processing, command='cmsRun simple_pset.py', publish_label='test', merge_size='3.5G', outputs=['output.root'] ) workflows.append(ttH) config = Config( workdir='/tmpscratch/users/$USER/lobster_test_' + version, plotdir='~/www/lobster/test_' + version, storage=storage, workflows=workflows, advanced=AdvancedOptions( bad_exit_codes=[127, 160],
shutil.copy(template_loc, mod_loc) for sed_str in sed_str_list: if sed_str: run_process(['sed', '-i', '-e', sed_str, mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p, c=c, r=r, mod=mod_tag) gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['gen']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, # Do not accidently clean up the LHE files!!! globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset(files=lhe_dir, files_per_task=1, patterns=["*.root"]), category=gen_resources) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['sim']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['sim']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, #cleanup_input=False, globaltag=False, outputs=['SIM-00000.root'],
wf = [] print "Generating workflows:" for idx,gen_dir in enumerate(gen_dirs): #arr = gen_dir.split('_') head,tail = os.path.split(gen_dir) arr = tail.split('_') p,c,r = arr[2],arr[3],arr[4] print "\t[{n}/{tot}] GEN Input: {dir}".format(n=idx+1,tot=len(gen_dirs),dir=gen_dir) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p,c=c,r=r), command='cmsRun EFTLHEReader_cfg.py', sandbox=cmssw.Sandbox(release='../../../../../CMSSW_10_6_8'), # This file should be in CMSSW_10_6_8/src/EFTGenReader/LHEReader/test/lobster. TODO: Specify path in a better way. merge_size='1.0G', cleanup_input=False, dataset=Dataset( files=gen_dir, files_per_task=5, # Remember that the GEN step already does 5-10 files per task patterns=["*.root"] ), category=processing ) wf.extend([output]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions(
'minPtJet=30.0', 'maxEtaJet=2.5', 'maxEtaLep=2.5' ]) if not is_eft: cms_cmd.extend(['iseft=False']) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) # The workflow label can't have any dashes (-) in it, so remove them safe_label_name = sample_name.replace('-','') output = Workflow( label='output_{label}'.format(label=safe_label_name), command=' '.join(cms_cmd), cleanup_input=False, outputs=['output_tree.root'], merge_size=merge_size, # Note: Lobster takes a very long time trying to merge large numbers of small files for some reason dataset=ds, merge_command='hadd @outputfiles @inputfiles', category=processing ) wf.extend([output]) config = Config( label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False,
p, c, r = arr[2], arr[3], arr[4] #print("p c r:",p,c,r) wf_fragments = {} for step in wf_steps: template_loc = fragment_map["all_procs"][step] wf_fragments[step] = template_loc label_tag = "{p}_{c}_{r}".format(p=p, c=c, r=r) naod = Workflow( label='nAOD_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['naod']), sandbox=cmssw.Sandbox(release=PATH_TO_NAOD_CMSSW), #merge_size='256M', merge_size='1000M', merge_command='python haddnano.py @outputfiles @inputfiles', extra_inputs=[ os.path.join(PATH_TO_NAOD_CMSSW, 'src/PhysicsTools/NanoAODTools/scripts/haddnano.py') ], cleanup_input=False, # Leave the MAOD files outputs=['NAOD-00000.root'], dataset=Dataset(files=maod_dir, files_per_task=1, patterns=["*.root"]), category=naod_resources) wf.extend([naod]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf,
else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p, c=c, r=r, mod=mod_tag) print "\t\tLabel: {label}".format(label=label_tag) print "\nThis is the wf_fragments:", wf_fragments, "\n" gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset( files=lhe_dir, #files_per_task=2, files_per_task=1, patterns=["*.root"]), category=gen_resources) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['sim']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['sim']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['SIM-00000.root'],
tail = tail.replace("cfg.py", "{tag}_cfg.py".format(tag=cfg_tag)) mod_loc = os.path.join(MODIFIED_CFG_DIR, tail) shutil.copy(template_loc, mod_loc) for sed_str in sed_str_list: if sed_str: run_process(['sed', '-i', '-e', sed_str, mod_loc]) wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' gen = Workflow( label='gen_step_{p}_{c}{mod}_{r}'.format(p=p, c=c, mod=mod_tag, r=r), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), #sandbox=cmssw.Sandbox(release='CMSSW_9_3_1'), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset(files=lhe_dir, files_per_task=5, patterns=["*.root"]), category=gen_resources) wf.extend([gen]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions(dashboard=False,
head, tail = os.path.split(maod_dir) arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1, tot=len(maod_dirs), dir=maod_dir) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) output = Workflow(label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command=' '.join(cms_cmd), merge_size='1.0G', cleanup_input=False, dataset=Dataset(files=maod_dir, files_per_task=5, patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160], log_level=1, ))
else: wf_fragments[step] = fragment_map['default'][step] multiplier = event_multiplier['default'] if event_multiplier.has_key(p): multiplier = event_multiplier[p] nevents = int(multiplier * events_per_gridpack) print "\t[{0}/{1}] Gridpack: {gp} (nevts {events})".format( idx + 1, len(gridpack_list), gp=gridpack, events=nevents) lhe = Workflow( label=label, command='cmsRun {cfg}'.format(cfg=wf_fragments['lhe']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_1'), merge_size= -1, # Don't merge the output files, to keep individuals as small as possible cleanup_input=False, globaltag=False, outputs=['HIG-RunIIFall17wmLHE-00000ND.root'], dataset=MultiProductionDataset(gridpacks=gridpack, events_per_gridpack=nevents, events_per_lumi=events_per_lumi, lumis_per_task=1, randomize_seeds=True), category=cat) wf.extend([lhe]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions(dashboard=False,
workflows = [] for dset, tasksize, events in zip(datasets, tasksizes, events): tasks = int(events / tasksize) lhe = Workflow( label=dset + '_lhe', pset='configs/' + dset + '_lhe.py', merge_size='2000M', dataset=ProductionDataset( events_per_task=tasksize, events_per_lumi=200, number_of_tasks=tasks ), category=Category( name='lhe', cores=2, disk=2000, memory=2000 ), sandbox=[ cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1/CMSSW_8_0_21'), cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1_rh7/CMSSW_8_0_21') ] ) aod = Workflow( label=dset + '_aod', pset='configs/' + dset + '_aod.py', dataset=ParentDataset( parent=lhe,
cmd = ['cmsRun'] cmd.append(gp_info['lhe_cfg']) label = 'lhe_step_{tag}'.format(tag=name) print "\tLHE Step: {label}".format(label=label) print "\tLHE cfg: {cfg}".format(cfg=gp_info['lhe_cfg']) lhe = Workflow( label=label, command=' '.join(cmd), sandbox=cmssw.Sandbox(release=gp_info['lhe_release']), merge_size=-1, cleanup_input=False, globaltag=False, outputs=['LHE-00000.root'], dataset=MultiProductionDataset( gridpacks=gp_loc, events_per_gridpack=nevents, events_per_lumi=events_per_lumi, lumis_per_task=1, randomize_seeds=True ), category=lhe_resources ) cmd = ['cmsRun'] cmd.append(gp_info['gen_cfg']) label = 'gen_step_{tag}'.format(tag=name) print "\tGEN Step: {label}".format(label=label) print "\tGEN cfg: {cfg}".format(cfg=gp_info['gen_cfg'])
if sed_str: run_process(['sed','-i','-e',sed_str,mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p,c=c,r=r,mod=mod_tag) print "\t\tLabel: {label}".format(label=label_tag) gs = Workflow( label='gs_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gs']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['HIG-RunIIFall17wmLHEGS-00040ND.root'], dataset=Dataset( files=lhe_dir, files_per_task=1, patterns=["*.root"] ), category=gs_resources ) digi = Workflow( label='digi_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['digi']), sandbox=cmssw.Sandbox(release='CMSSW_9_4_0_patch1'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, # Save the GEN-SIM step outputs=['HIG-RunIIFall17DRPremix-00823ND_step1.root'],
print "Generating workflows:" for idx, gen_dir in enumerate(gen_dirs): #arr = gen_dir.split('_') head, tail = os.path.split(gen_dir) arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] print "\t[{n}/{tot}] GEN Input: {dir}".format(n=idx + 1, tot=len(gen_dirs), dir=gen_dir) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command='cmsRun EFTLHEReader_cfg.py', merge_size='1.0G', cleanup_input=False, dataset=Dataset( files=gen_dir, files_per_task= 5, # Remember that the GEN step already does 5-10 files per task patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160],
version = "v5" storage = StorageConfiguration(output=[ "hdfs://eddie.crc.nd.edu:19000/store/user/khurtado/lobster_mc_" + version, # "file:///hadoop/store/user/khurtado/lobster_mc_" + version, # "root://deepthought.crc.nd.edu//store/user/khurtado/lobster_mc_" + version, # "chirp://eddie.crc.nd.edu:9094/store/user/khurtado/lobster_test_" + version, "gsiftp://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, # "srm://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, ]) workflows = [] lhe = Workflow(label='lhe_step', pset='mc_gen/HIG-RunIIWinter15wmLHE-00196_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_16_patch1'), merge_size='10M', dataset=ProductionDataset(events_per_task=50, events_per_lumi=5, number_of_tasks=10), category=Category(name='lhe', cores=1, memory=1000)) gs = Workflow(label='gs_step', pset='mc_gen/HIG-RunIISummer15GS-00177_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_18'), merge_size='100M', dataset=ParentDataset(parent=lhe, units_per_task=1), category=Category(name='gs', cores=1, memory=2000, runtime=45 * 60)) digi = Workflow(label='digi_step',
continue sample_loc = ds_helper.getData(sample_name, 'loc') ds = cmssw.Dataset(dataset=sample_loc, events_per_task=30000) cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1, tot=len(samples), dir=sample_name) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) safe_label_name = sample_name.replace('-', '') output = Workflow(label='output_{label}'.format(label=safe_label_name), command=' '.join(cms_cmd), cleanup_input=False, merge_size=-1, dataset=ds, category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions(dashboard=False, bad_exit_codes=[127, 160], log_level=1, xrootd_servers=[ 'ndcms.crc.nd.edu',