tail = tail.replace("cfg.py","{tag}_cfg.py".format(tag=cfg_tag)) mod_loc = os.path.join(MODIFIED_CFG_DIR,tail) shutil.copy(template_loc,mod_loc) for sed_str in sed_str_list: if sed_str: run_process(['sed','-i','-e',sed_str,mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p,c=c,r=r,mod=mod_tag) print "\t\tLabel: {label}".format(label=label_tag) gs = Workflow( label='gs_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gs']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['HIG-RunIIFall17wmLHEGS-00040ND.root'], dataset=Dataset( files=lhe_dir, files_per_task=1, patterns=["*.root"] ), category=gs_resources ) digi = Workflow( label='digi_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['digi']),
version = "v5" storage = StorageConfiguration(output=[ "hdfs://eddie.crc.nd.edu:19000/store/user/khurtado/lobster_mc_" + version, # "file:///hadoop/store/user/khurtado/lobster_mc_" + version, # "root://deepthought.crc.nd.edu//store/user/khurtado/lobster_mc_" + version, # "chirp://eddie.crc.nd.edu:9094/store/user/khurtado/lobster_test_" + version, "gsiftp://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, # "srm://T3_US_NotreDame/store/user/khurtado/lobster_mc_" + version, ]) workflows = [] lhe = Workflow(label='lhe_step', pset='mc_gen/HIG-RunIIWinter15wmLHE-00196_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_16_patch1'), merge_size='10M', dataset=ProductionDataset(events_per_task=50, events_per_lumi=5, number_of_tasks=10), category=Category(name='lhe', cores=1, memory=1000)) gs = Workflow(label='gs_step', pset='mc_gen/HIG-RunIISummer15GS-00177_1_cfg.py', sandbox=cmssw.Sandbox(release='mc_gen/CMSSW_7_1_18'), merge_size='100M', dataset=ParentDataset(parent=lhe, units_per_task=1), category=Category(name='gs', cores=1, memory=2000, runtime=45 * 60))
label=dset + '_lhe', pset='configs/' + dset + '_lhe.py', merge_size='2000M', dataset=ProductionDataset( events_per_task=tasksize, events_per_lumi=200, number_of_tasks=tasks ), category=Category( name='lhe', cores=2, disk=2000, memory=2000 ), sandbox=[ cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1/CMSSW_8_0_21'), cmssw.Sandbox(release='/afs/crc.nd.edu/user/m/mwolf3/work/ttH/mcgen/moriond17_part1_rh7/CMSSW_8_0_21') ] ) aod = Workflow( label=dset + '_aod', pset='configs/' + dset + '_aod.py', dataset=ParentDataset( parent=lhe, units_per_task=4 ), category=Category( name='aod', cores=2, disk=1000,
"{tag}_cfg.py".format(tag=cfg_tag)) mod_loc = os.path.join(MODIFIED_CFG_DIR, tail) shutil.copy(template_loc, mod_loc) for sed_str in sed_str_list: if sed_str: run_process(['sed', '-i', '-e', sed_str, mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p, c=c, r=r, mod=mod_tag) gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['gen']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, # Do not accidently clean up the LHE files!!! globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset(files=lhe_dir, files_per_task=1, patterns=["*.root"]), category=gen_resources) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['sim']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['sim']), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True,
gridpack_inputs = [ os.path.join(base, madgraph), os.path.join(base, np_model), '{}/gridpack.py'.format(base), cards ] workflows = [] for process in processes: for coefficient_group in itertools.combinations(coefficients, dimension): tag = '_'.join(coefficient_group) gridpacks = Workflow( label='{}_gridpacks_{}'.format(process, tag), dataset=MultiGridpackDataset(events_per_gridpack=26000, events_per_lumi=13000), category=gridpack_resources, sandbox=cmssw.Sandbox(release=release), # Use the command and extra_inputs below to constrain coefficient values with an input scan and # scale value rather than an interval. You can obtain the input scan by running 'cross_sections.py' # and then (example assuming scan='final_pass.total.npz' and cross_sections_version='ttV/cross_sections/1') # merge_scans final_pass.total.npz /hadoop/store/user/$USER/ttV/cross_sections/1/final_pass_*/*npz # mv final_pass.total.npz /hadoop/store/user/$USER/ttV/cross_sections/1/ # command='python gridpack.py {np} {cores} {coefficients} {events} {mg} {model} {pp} {cards} {pcard} --constraints {constraints} --scale {scale} --scan {scan}'.format( # np=numvalues, # cores=cores, # coefficients=','.join(coefficient_group), # events=events, # mg=madgraph, # model='HEL_UFO.third_gen.tar.gz', # pp=np_param_path, # cards=os.path.split(cards)[-1], # pcard='{}.dat'.format(process),
arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1, tot=len(maod_dirs), dir=maod_dir) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command=' '.join(cms_cmd), sandbox=cmssw.Sandbox( release='../../../../../CMSSW_10_6_8' ), # This file should be in CMSSW_10_6_8/src/EFTGenReader/LHEReader/test/lobster. TODO: Specify path in a better way. merge_size='1.0G', cleanup_input=False, dataset=Dataset(files=maod_dir, files_per_task=5, patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160],
name = gp_info['name'] gp_loc = os.path.join(gp_info['location'],gp_info['tarball']) print "[{0}/{1}] Gridpack: {gp} (nevts {events})".format(idx+1,len(gridpacks),gp=name,events=nevents) print "\tGridpack: {path}".format(path=gp_loc) cmd = ['cmsRun'] cmd.append(gp_info['lhe_cfg']) label = 'lhe_step_{tag}'.format(tag=name) print "\tLHE Step: {label}".format(label=label) print "\tLHE cfg: {cfg}".format(cfg=gp_info['lhe_cfg']) lhe = Workflow( label=label, command=' '.join(cmd), sandbox=cmssw.Sandbox(release=gp_info['lhe_release']), merge_size=-1, cleanup_input=False, globaltag=False, outputs=['LHE-00000.root'], dataset=MultiProductionDataset( gridpacks=gp_loc, events_per_gridpack=nevents, events_per_lumi=events_per_lumi, lumis_per_task=1, randomize_seeds=True ), category=lhe_resources ) cmd = ['cmsRun']
wf_fragments = {} for step in wf_steps: if fragment_map.has_key(p) and fragment_map[p].has_key(step): wf_fragments[step] = fragment_map[p][step] else: wf_fragments[step] = fragment_map['default'][step] multiplier = event_multiplier['default'] if event_multiplier.has_key(p): multiplier = event_multiplier[p] nevents = int(multiplier * events_per_gridpack) print "\t[{0}/{1}] Gridpack: {gp} (nevts {events})".format( idx + 1, len(gridpack_list), gp=gridpack, events=nevents) lhe = Workflow( label=label, command='cmsRun {cfg}'.format(cfg=wf_fragments['lhe']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_1'), merge_size= -1, # Don't merge the output files, to keep individuals as small as possible cleanup_input=False, globaltag=False, outputs=['HIG-RunIIFall17wmLHE-00000ND.root'], dataset=MultiProductionDataset(gridpacks=gridpack, events_per_gridpack=nevents, events_per_lumi=events_per_lumi, lumis_per_task=1, randomize_seeds=True), category=cat) wf.extend([lhe]) config = Config(label=master_label, workdir=workdir_path,
len(maod_dirs), dir=maod_dir) head, tail = os.path.split(maod_dir) arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] #print("p c r:",p,c,r) wf_fragments = {} for step in wf_steps: template_loc = fragment_map["all_procs"][step] wf_fragments[step] = template_loc label_tag = "{p}_{c}_{r}".format(p=p, c=c, r=r) naod = Workflow( label='nAOD_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['naod']), sandbox=cmssw.Sandbox(release=PATH_TO_NAOD_CMSSW), #merge_size='256M', merge_size='1000M', merge_command='python haddnano.py @outputfiles @inputfiles', extra_inputs=[ os.path.join(PATH_TO_NAOD_CMSSW, 'src/PhysicsTools/NanoAODTools/scripts/haddnano.py') ], cleanup_input=False, # Leave the MAOD files outputs=['NAOD-00000.root'], dataset=Dataset(files=maod_dir, files_per_task=1, patterns=["*.root"]), category=naod_resources) wf.extend([naod]) config = Config(label=master_label,
for sed_str in sed_str_list: if sed_str: run_process(['sed', '-i', '-e', sed_str, mod_loc]) else: mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p, c=c, r=r, mod=mod_tag) print "\t\tLabel: {label}".format(label=label_tag) print "\nThis is the wf_fragments:", wf_fragments, "\n" gen = Workflow( label='gen_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gen']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['GEN-00000.root'], dataset=Dataset( files=lhe_dir, #files_per_task=2, files_per_task=1, patterns=["*.root"]), category=gen_resources) sim = Workflow( label='sim_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['sim']), sandbox=cmssw.Sandbox(release=rel_map[UL_YEAR]['sim']),
], 'L1Stub_SingleElectron_D49_SW6p0': [ 'rgoldouz/FullProduction/L1tracker_DAS_CMSSW112pre5/L1Stub_SingleElectron_D49_SW6p0', '100000', 'ele' ] } wf = [] for key, value in samples.items(): if 'DisplacedMu' not in key: continue print key Analysis = Workflow( label='FE_L1Analysis_%s' % (key), sandbox=cmssw.Sandbox( release='/afs/crc.nd.edu/user/r/rgoldouz/CMSSW_10_4_0'), dataset=Dataset(files=value[0], files_per_task=50), globaltag=False, command='python Lobster_check.py ' + value[1] + ' ' + value[2] + ' @inputfiles', extra_inputs=[ 'Lobster_check.py', '../lib/main.so', '../include/MyAnalysis.h', ], outputs=['ANoutput.root'], # dataset=Dataset( # files=value[0], # files_per_task=50, # patterns=["*.root"] # ),