def main(): channels = args.channels years = args.years batchSystem = 'psibatch_runner.sh' chunkpattern = re.compile(r".*_(\d+)_[a-z]+\.root") for year in years: # GET LIST samplelist = [] outdir = "output_%s/" % (year) for directory in sorted(os.listdir(outdir)): if not os.path.isdir(outdir + directory): continue if args.samples and not matchSampleToPattern( directory, args.samples): continue if args.veto and not matchSampleToPattern(directory, args.veto): continue if args.type == 'mc' and any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue if args.type == 'data' and not any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue samplelist.append(directory) if not samplelist: print "No samples found in %s!" % (outdir) if args.verbose: print samplelist # RESUBMIT samples for channel in channels: print header(year, channel) for directory in samplelist: #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue outdir = "output_%s/%s" % (year, directory) outfilelist = glob.glob(outdir + '/*_' + channel + '.root') nFilesPerJob = args.nFilesPerJob jobName = getSampleShortName(directory)[1] if not outfilelist: continue # GET INPUT FILES if 'LQ' in directory: infiles = getFileListPNFS(directory) else: infiles = getFileListDAS('/' + directory.replace('__', '/')) # NFILESPERJOBS CHECKS # Diboson (WW, WZ, ZZ) have very large files and acceptance, # and the jet-binned DY and WJ files need to be run separately because of a bug affecting LHE_Njets if nFilesPerJob > 1 and any(vv in jobName[:8] for vv in [ 'WW', 'WZ', 'ZZ', 'DY', 'WJ', 'W1J', 'W2J', 'W3J', 'W4J', 'Single', 'Tau' ]): print bcolors.BOLD + bcolors.WARNING + "[WN] setting number of files per job from %s to 1 for %s" % ( nFilesPerJob, jobName) + bcolors.ENDC nFilesPerJob = 1 infilelists = list(split_seq(infiles, nFilesPerJob)) badchunks = [] misschunks = range(0, len(infilelists)) jobList = 'joblist/joblist%s_%s_retry.txt' % (directory, channel) with open(jobList, 'w') as jobslog: for filename in outfilelist: match = chunkpattern.search(filename) if match: chunk = int(match.group(1)) else: print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % ( filename) + bcolors.ENDC exit(1) if chunk in misschunks: misschunks.remove(chunk) elif chunk >= len(infilelists): print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s) !' % ( filename, chunk, len(infilelists), nFilesPerJob) + bcolors.ENDC else: print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % ( filename, chunk, len(infilelists)) + bcolors.ENDC file = TFile(filename, 'READ') if not file.IsZombie() and file.GetListOfKeys( ).Contains('tree') and file.GetListOfKeys().Contains( 'cutflow'): continue infiles = infilelists[chunk] createJobs(jobslog, infiles, outdir, directory, chunk, channel, year=year) badchunks.append(chunk) # BAD CHUNKS if len(badchunks) > 0: badchunks.sort() chunktext = ('chunks ' if len(badchunks) > 1 else 'chunk ') + ', '.join( str(ch) for ch in badchunks) print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d failed! Resubmitting %s...' % ( directory, len(badchunks), len(outfilelist), chunktext) + bcolors.ENDC # MISSING CHUNKS if len(misschunks) > 0: chunktext = ('chunks ' if len(misschunks) > 1 else 'chunk ') + ', '.join( str(i) for i in misschunks) print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files ! Resubmitting %s..." % ( directory, len(misschunks), len(outfilelist), chunktext) + bcolors.ENDC for chunk in misschunks: infiles = infilelists[chunk] createJobs(jobslog, infiles, outdir, directory, chunk, channel, year=year) # RESUBMIT jobName += "_%s_%s" % (channel, year) nChunks = len(badchunks) + len(misschunks) if nChunks == 0: print bcolors.BOLD + bcolors.OKBLUE + '[OK] ' + directory + bcolors.ENDC elif args.force: submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: print "Not submitting jobs" print
def main(): channels = args.channels years = args.years tes = args.tes ltf = args.ltf jtf = args.jtf Zmass = args.Zmass prefetch = args.prefetch batchscript = 'submit_SGE.sh' tag = args.tag if tag and tag[0]!='_': tag = '_'+tag if tes!=1.: tag += "_TES%.3f"%(tes) if ltf!=1.: tag += "_LTF%.3f"%(ltf) if jtf!=1.: tag += "_JTF%.3f"%(jtf) if Zmass: tag += "_Zmass" tag = tag.replace('.','p') for year in years: # READ SAMPLES directories = [ ] samplelist = "samples_%s.cfg"%(year) with open(samplelist, 'r') as file: for line in file: line = line.rstrip().lstrip().split(' ')[0].rstrip('/') if line[:2].count('#')>0: continue if line=='': continue if line.count('/')<2: continue sample = '/'.join(line.split('/')[-3:]) if args.samples and not matchSampleToPattern(sample,args.samples): continue if args.vetoes and matchSampleToPattern(sample,args.vetoes): continue if args.type=='mc' and any(s in sample[:len(s)+2] for s in ['SingleMuon','SingleElectron','Tau','EGamma']): continue if args.type=='data' and not any(s in sample[:len(s)+2] for s in ['SingleMuon','SingleElectron','Tau','EGamma']): continue directories.append(line) if args.testrun: directories = directories[:1] #print directories blacklist = getBlackList("filelist/blacklist.txt") tasks = [] for channel in channels: print header(year,channel,tag) # SUBMIT SAMPLES for directory in directories: if args.verbose: print "\ndirectory =",directory # FILTER if 'SingleMuon' in directory and channel not in ['mutau','mumu','elemu']: continue if ('SingleElectron' in directory or 'EGamma' in directory) and channel!='eletau': continue if 'Tau' in directory[:5] and channel!='tautau': continue if 'LQ3' in directory[:5] and channel not in ['mutau','eletau','tautau']: continue # GET SKIMMED if args.useSkim and isSkimmed(directory,year): directory = isSkimmed(directory,year) if args.verbose: print "skimmed:", directory print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC # FILE LIST files = [ ] if not args.useDAS: files = getFileListLocal(directory,blacklist=blacklist) if not files: if not args.useDAS: print "Getting file list from DAS/PNFS..." files = getFileList(directory,blacklist=blacklist) if files: saveFileListLocal(directory,files,blacklist=blacklist) if not files: print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC continue elif args.verbose: print "FILELIST = "+files[0] for file in files[1:]: print " "+file if args.testrun: files = files[:1] # JOB LIST sample = '__'.join(directory.split('/')[-3:]) ensureDirectory('joblist') jobList = 'joblist/joblist_%s_%s%s.txt'%(sample,channel,tag) print "Creating job file %s..."%(jobList) jobName = getSampleShortName(directory)[1] jobName += "_%s_%s"%(channel,year)+tag jobs = open(jobList,'w') # outdir = ensureDirectory("output_%s/%s"%(year,sample)) outdir = "$PWD" ensureDirectory(outdir+'/logs/') # NFILESPERJOBS nFilesPerJob = args.nFilesPerJob if nFilesPerJob<1: for default, patterns in nFilesPerJob_defaults: if matchSampleToPattern(sample,patterns): nFilesPerJob = default break else: nFilesPerJob = nFilesPerJob_default if args.verbose: print "nFilesPerJob = %s"%nFilesPerJob filelists = chunkify(files,nFilesPerJob) # CREATE JOBS nChunks = 0 checkExistingFiles(outdir,channel,len(filelists)) #filelists = chunkify(files,1) for file in filelists: #print "FILES = ",f createJobs(jobs,file,outdir,sample,nChunks,channel,year=year,tes=tes,ltf=ltf,jtf=jtf,Zmass=Zmass,tag=tag,prefetch=prefetch) nChunks = nChunks+1 jobs.close() # SUBMIT if args.force: submitJobs_gc(jobName,jobList,nChunks,outdir,batchscript, args.workdir, year, channel) else: submit = raw_input("Do you also want to submit %d jobs to the batch system? [y/n] "%(nChunks)) if submit.lower()=='force': submit = 'y' args.force = True if submit.lower()=='quit': exit(0) if submit.lower()=='y': submitJobs_gc(jobName,jobList,nChunks,outdir,batchscript, args.workdir, year, channel) else: print "Not submitting jobs" print tasks.append("_".join([jobName, str(year), channel])) # Assemble while script to start every gc task. while_temp = open(os.environ["CMSSW_BASE"]+"/src/NanoTreeProducer/while_temp.sh", "r").read() task_list = ["go.py {}gc_conf/{}.conf".format(args.workdir,task) for task in tasks] while_temp = while_temp.format(TASK_COMMANDS="\n".join(task_list)) with open(os.path.join(args.workdir, "while.sh"), "w") as out: out.write(while_temp) print 'Submit samples with: "bash {}"'.format(os.path.join(args.workdir, "while.sh"))
def main(): channels = args.channels years = args.years tes = args.tes ltf = args.ltf jtf = args.jtf batchSystem = 'slurm_runner.sh' chunkpattern = re.compile(r".*_(\d+)_[a-z]+(?:_[A-Z]+\dp\d+)?\.root") tag = "" if tes != 1.: tag += "_TES%.3f" % (tes) if ltf != 1.: tag += "_LTF%.3f" % (ltf) if jtf != 1.: tag += "_JTF%.3f" % (jtf) tag = tag.replace('.', 'p') for year in years: # GET LIST samplelist = [] outdir = "/work/pbaertsc/heavy_resonance/output_%s/" % (year) for directory in sorted(os.listdir(outdir)): if not os.path.isdir(outdir + directory): continue if args.samples and not matchSampleToPattern( directory, args.samples): continue if args.vetos and matchSampleToPattern(directory, args.vetos): continue if args.type == 'mc' and any(s in directory[:len(s) + 2] for s in [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MET', 'SinglePhoton' ]): continue if args.type == 'data' and not any( s in directory[:len(s) + 2] for s in [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MET', 'SinglePhoton' ]): continue samplelist.append(directory) if not samplelist: print "No samples found in %s!" % (outdir) if args.verbose: print samplelist # RESUBMIT samples for channel in channels: print header(year, channel, tag) for directory in samplelist: #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue outdir = "/work/pbaertsc/heavy_resonance/output_%s/%s" % ( year, directory) outfilelist = glob.glob("%s/*_%s%s.root" % (outdir, channel, tag)) nFilesPerJob = args.nFilesPerJob jobName = getSampleShortName(directory, year)[1] jobName += "_%s_%s" % (channel, year) + tag #if not outfilelist: continue # FILE LIST infiles = [] if not args.useDAS: infiles = getFileListLocal(directory) if not infiles: if not args.useDAS: print "Getting file list from DAS..." infiles = getFileListDAS('/' + directory) if infiles: saveFileListLocal(directory, infiles) if not infiles: print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC continue elif args.verbose: print "FILELIST = " + infiles[0] for file in infiles[1:]: print " " + file # NFILESPERJOBS nFilesPerJob = 1 if args.verbose: print "nFilesPerJob = %s" % nFilesPerJob infilelists = list(split_seq(infiles, nFilesPerJob)) # JOB LIST badchunks = [] misschunks = range(0, len(infilelists)) jobList = '/work/pbaertsc/heavy_resonance/NanoTreeProducer/joblist/joblist_%s_%s%s_retry.txt' % ( directory, channel, tag) with open(jobList, 'w') as jobslog: for filename in outfilelist: match = chunkpattern.search(filename) if match: chunk = int(match.group(1)) else: print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % ( filename) + bcolors.ENDC exit(1) if chunk in misschunks: misschunks.remove(chunk) elif chunk >= len(infilelists): print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s), check DAS, or resubmit everything!' % ( filename, chunk, len(infilelists), nFilesPerJob) + bcolors.ENDC else: print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % ( filename, chunk, len(infilelists)) + bcolors.ENDC file = TFile(filename, 'READ') if not file.IsZombie() and ( file.GetListOfKeys().Contains('tree') and file.GetListOfKeys().Contains('pileup') and file.GetListOfKeys().Contains('Events')): continue infiles = infilelists[chunk] createJobs(jobslog, infiles, outdir, directory, chunk, channel, year=year, tes=tes, ltf=ltf, jtf=jtf) badchunks.append(chunk) # BAD CHUNKS if len(badchunks) > 0: badchunks.sort() chunktext = ('chunks ' if len(badchunks) > 1 else 'chunk ') + ', '.join( str(ch) for ch in badchunks) print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d failed! Resubmitting %s...' % ( directory, len(badchunks), len(outfilelist), chunktext) + bcolors.ENDC # MISSING CHUNKS if len(misschunks) > 0: chunktext = ('chunks ' if len(misschunks) > 1 else 'chunk ') + ', '.join( str(i) for i in misschunks) print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files ! Resubmitting %s..." % ( directory, len(misschunks), len(outfilelist), chunktext) + bcolors.ENDC for chunk in misschunks: infiles = infilelists[chunk] createJobs(jobslog, infiles, outdir, directory, chunk, channel, year=year, tes=tes, ltf=ltf, jtf=jtf) # RESUBMIT nChunks = len(badchunks) + len(misschunks) if nChunks == 0: print bcolors.BOLD + bcolors.OKBLUE + '[OK] ' + directory + bcolors.ENDC elif args.force: submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: print "Not submitting jobs" print
def main(): channels = args.channels years = args.years tes = args.tes ltf = args.ltf jtf = args.jtf batchSystem = 'psibatch_runner.sh' tag = "" if tes != 1.: tag += "_TES%.3f" % (tes) if ltf != 1.: tag += "_LTF%.3f" % (ltf) if jtf != 1.: tag += "_JTF%.3f" % (jtf) tag = tag.replace('.', 'p') for year in years: # READ SAMPLES directories = [] samplelist = "samples_%s.cfg" % (year) with open(samplelist, 'r') as file: for line in file: line = line.rstrip().lstrip().split(' ')[0].rstrip('/') if line[:2].count('#') > 0: continue if line == '': continue if args.samples and not matchSampleToPattern( line, args.samples): continue if args.vetos and matchSampleToPattern(line, args.vetos): continue if args.type == 'mc' and any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'EGamma', 'MET']): continue if args.type == 'data' and not any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'EGamma', 'MET']): continue directories.append(line) #print directories for channel in channels: print header(year, channel, tag) # SUBMIT SAMPLES for directory in directories: if args.verbose: print "\ndirectory =", directory # FILTER print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC # FILE LIST files = [] name = directory.split('/')[-3].replace( '/', '') + '__' + directory.split('/')[-2].replace( '/', '') + '__' + directory.split('/')[-1].replace( '/', '') if not args.useDAS: files = getFileListLocal(directory) if not files: if not args.useDAS: print "Getting file list from DAS..." if 'pnfs' in directory: files = getFileListPNFS(directory) else: files = getFileListDAS(directory) if files: saveFileListLocal(name, files) if not files: print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC continue elif args.verbose: print "FILELIST = " + files[0] for file in files[1:]: print " " + file # JOB LIST ensureDirectory('joblist_skim') jobList = 'joblist_skim/joblist_%s_%s%s.txt' % (name, channel, tag) print "Creating job file %s..." % (jobList) jobName = getSampleShortName(directory, year)[1] jobName += "_%s_%s" % (channel, year) + tag jobs = open(jobList, 'w') outdir = ensureDirectory( "/work/pbaertsc/heavy_resonance/output_skim_%s/%s" % (year, name)) ensureDirectory(outdir + '/logs/') # NFILESPERJOBS nFilesPerJob = 1 if nFilesPerJob < 1: for default, patterns in nFilesPerJob_defaults: if matchSampleToPattern(directory, patterns): nFilesPerJob = default break else: nFilesPerJob = 4 # default if args.verbose: print "nFilesPerJob = %s" % nFilesPerJob filelists = list(split_seq(files, nFilesPerJob)) # CREATE JOBS nChunks = 0 checkExistingFiles(outdir, channel, len(filelists)) #filelists = list(split_seq(files,1)) for file in filelists: #print "FILES = ",f createJobs(jobs, file, outdir, name, nChunks, channel, year=year, tes=tes, ltf=ltf, jtf=jtf) nChunks = nChunks + 1 jobs.close() # SUBMIT if args.force: submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: print "Not submitting jobs" print
def main(args): years = args.years tag = args.tag checkEvents = args.checkEvents outbasedir = "/scratch/ineuteli" batchscript = 'submit_SGE.sh' director = "root://t3dcachedb.psi.ch:1094/" #"root://xrootd-cms.infn.it/" for year in years: samplesdir = args.outdir if args.outdir else "/pnfs/psi.ch/cms/trivcat/store/user/ineuteli/samples/NANOAOD_%s" % ( year) # GET LIST samplelist = [] if args.instance == None: directories = glob.glob(samplesdir + "/*/*/NANOAOD*") + glob.glob(samplesdir + "/*/*/USER*") else: directories = glob.glob(samplesdir + "/*/*/" + args.instance) for directory in sorted(directories): sample = '/'.join(directory.split('/')[-3:]) if args.samples and not matchSampleToPattern(sample, args.samples): continue if args.vetos and matchSampleToPattern(sample, args.vetos): continue if args.type == 'mc' and any( s in sample[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma' ]): continue if args.type == 'data' and not any( s in sample[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma' ]): continue if not os.path.isdir(directory): continue samplelist.append(directory) if not samplelist: print "No samples found in %s!" % (samplesdir) if args.verbose: print 'samplelist = %s\n' % (samplelist) # CHECK samples print header(year, tag) for directory in samplelist: sample = '__'.join(directory.split('/')[-3:]) filelist = '%s/*_skim%s*.root' % (directory, tag) if args.verbose: print "directory = %s" % (directory) print "filelist = %s" % (filelist) print "sample = %s" % (sample) # FILE LIST ON SE filelist = [ director + d for d in sorted(glob.glob(filelist), key=naturalSort) ] if not filelist: print bcolors.BOLD + bcolors.WARNING + "[WN] %s empty filelist" % directory + bcolors.ENDC elif args.verbose: print "filelist = %s" % (filelist[0]) for file in filelist[1:]: print " " + file # FILE LIST ON DAS infilelist = [] if args.useLocal: infilelist = getFileListLocal(sample) if not infilelist: if args.useLocal: print "Getting file list from DAS/PNFS..." infilelist = getFileListDAS(sample) if not infilelist: print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC elif args.verbose: print "infilelist = %s" % (infilelist[0]) for file in infilelist[1:]: print " " + file # FILE LIST FOR RESUBMISSION nevents, resubmitfiles = checkFiles( filelist, infilelist, directory, clean=args.removeBadFiles, force=args.force, cleanBug=args.removeBuggedFiles, checkEvents=checkEvents) if len(resubmitfiles) == 0: print bcolors.BOLD + bcolors.OKGREEN + '[OK] %s is complete ! ' % sample + bcolors.ENDC elif len(resubmitfiles) > len(infilelist): print bcolors.BOLD + bcolors.FAIL + 'WARNING! %s has more output files %d than %d input files from DAS!' % ( sample, len(resubmitfiles), len(infilelist)) + bcolors.ENDC else: print bcolors.BOLD + bcolors.WARNING + '[WN] %d / %d of %s need to be resubmitted...' % ( len(resubmitfiles), len(infilelist), sample) + bcolors.ENDC if checkEvents and not any(s in directory for s in ['LQ3']): compareEventsToDAS(nevents, sample, treename='Events') if len(resubmitfiles) == 0: print continue # JOB LIST ensureDirectory('joblist') jobList = 'joblist/joblist_%s_skim%s_retry.txt' % (sample, tag) print "Creating job file %s..." % (jobList) jobName = getSampleShortName(directory)[1] jobName += "_%s_skim" % (year) + tag outdir = "%s/output_%s/%s" % (outbasedir, year, sample) logdir = ensureDirectory("skim_logs_%s/%s" % (year, sample)) # NFILESPERJOBS nFilesPerJob = args.nFilesPerJob if nFilesPerJob < 1: for default, patterns in nFilesPerJob_defaults: if matchSampleToPattern(directory, patterns): nFilesPerJob = default break else: nFilesPerJob = 1 # default if args.verbose: print "nFilesPerJob = %s" % nFilesPerJob filelists = chunkify(resubmitfiles, nFilesPerJob) # CREATE JOBS with open(jobList, 'w') as jobs: nChunks = 0 for filelist in filelists: createSkimJobs(jobs, year, sample, filelist, outdir, prefetch=args.prefetch) nChunks = nChunks + 1 # SUBMIT if args.force: submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript) else: print "Not submitting jobs" print
def main(args): years = args.years channels = args.channels njobs = args.njobs getFilesOfRunningJobs() exit(0) if args.running: getSubmittedJobs() return for year in years: indir = "output_%s/" % (year) os.chdir(indir) # GET LIST samplelist = [] for directory in sorted(os.listdir('./')): if not os.path.isdir(directory): continue if args.samples and not matchSampleToPattern( directory, args.samples): continue if args.veto and matchSampleToPattern(directory, args.veto): continue if args.type == 'mc' and any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue if args.type == 'data' and not any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue samplelist.append(directory) if not samplelist: print "No samples found in %s!" % (indir) if args.verbose: print 'samplelist = %s\n' % (samplelist) # CHECK samples for channel in channels: print header(year, channel) for directory in samplelist: print ">>> %s" % (directory) infiles = "%s/logs/*%s_%d*.o*.*" % (directory, channel, year) filelist = glob.glob(infiles) if not filelist: continue jobids = [] for filename in filelist: jobid, taskid = getJobID(filename) if jobid not in jobids: jobids.append(jobid) jobids.sort(reverse=True) jobids_max = jobids[:njobs] jobs = {id: [] for id in jobids_max} stuck = {id: [] for id in jobids_max} failed = {id: [] for id in jobids_max} running = {id: [] for id in jobids_max} done = {id: [] for id in jobids_max} for filename in filelist: if not any(".o%d." % (id) in filename for id in jobids_max): continue job = Job(filename) jobs[job.jobid].append(job) if job.stuck: stuck[job.jobid].append(job) if job.running: running[job.jobid].append(job) if job.failed: failed[job.jobid].append(job) if job.done: done[job.jobid].append(job) for jobid, joblist in sorted(jobs.iteritems()): ntot = len(joblist) jobs[jobid].sort() stuck[jobid].sort() failed[jobid].sort() running[jobid].sort() done[jobid].sort() print ">>> %d" % (jobid) if running[jobid]: print ">>> running: %4d /%4d, %12s" % (len( running[jobid]), ntot, average(running[jobid])) if failed[jobid]: print ">>> failed: %4d /%4d" % ( len(failed[jobid]), ntot ) #+ ', '.join([str(j) for j in failed[jobid]]) if stuck[jobid]: print ">>> stuck: %4d /%4d" % (len( stuck[jobid]), ntot) print ">>> done: %4d /%4d, %12s" % ( len(done[jobid]), ntot, average(done[jobid]) if done[jobid] else "" ) #+ ', '.join([str(j) for j in done[jobid]]) print ">>>" os.chdir('..')
def main(): channels = args.channels years = args.years tes = args.tes batchSystem = 'psibatch_runner.sh' for year in years: # READ SAMPLES directories = [] samplelist = "samples_%s.cfg" % (year) for line in open(samplelist, 'r'): line = line.rstrip().lstrip().split(' ')[0] if line[:2].count('#') > 0: continue if line == '': continue if args.samples and not matchSampleToPattern(line, args.samples): continue if args.veto and matchSampleToPattern(line, args.veto): continue if args.type == 'mc' and any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue if args.type == 'data' and not any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau']): continue directories.append(line) #print directories for channel in channels: print header(year, channel) # SUBMIT SAMPLES for directory in directories: if args.verbose: print "\ndirectory =", directory # FILTER if 'SingleMuon' in directory and channel not in [ 'mutau', 'mumu' ]: continue if 'SingleElectron' in directory and channel != 'eletau': continue if 'Tau' in directory[:5] and channel != 'tautau': continue if 'LQ3' in directory[:5] and channel not in [ 'mutau', 'eletau', 'tautau' ]: continue print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC files = None name = None if 'pnfs' in directory: name = directory.split('/')[8].replace( '/', '') + '__' + directory.split('/')[9].replace( '/', '') + '__' + directory.split('/')[10].replace( '/', '') #files = getFileListPNFS(directory) files = getFileListPNFS(name) else: files = getFileListDAS(directory) name = directory.split('/')[1].replace( '/', '') + '__' + directory.split('/')[2].replace( '/', '') + '__' + directory.split('/')[3].replace( '/', '') if not files: print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC continue elif args.verbose: print "FILELIST = " + files[0] for file in files[1:]: print " " + file # JOBLIST ensureDirectory('joblist') jobList = 'joblist/joblist%s_%s.txt' % (name, channel) print "Creating job file %s..." % (jobList) jobName = getSampleShortName(directory)[1] jobs = open(jobList, 'w') nFilesPerJob = args.nFilesPerJob outdir = ensureDirectory("output_%s/%s" % (year, name)) # NFILESPERJOBS CHECKS # Diboson (WW, WZ, ZZ) have very large files and acceptance, # and the jet-binned DY and WJ files need to be run separately because of a bug affecting LHE_Njets if nFilesPerJob > 1 and any(vv in jobName[:8] for vv in [ 'WW', 'WZ', 'ZZ', 'DY', 'WJ', 'W1J', 'W2J', 'W3J', 'W4J', 'Single', 'Tau' ]): print bcolors.BOLD + bcolors.WARNING + "[WN] setting number of files per job from %s to 1 for %s" % ( nFilesPerJob, jobName) + bcolors.ENDC nFilesPerJob = 1 try: os.stat(outdir) except: os.mkdir(outdir) try: os.stat(outdir + '/logs/') except: os.mkdir(outdir + '/logs/') # CREATE JOBS nChunks = 0 filelists = list(split_seq(files, nFilesPerJob)) checkExistingFiles(outdir, channel, len(filelists)) #filelists = list(split_seq(files,1)) for file in filelists: #print "FILES = ",f createJobs(jobs, file, outdir, name, nChunks, channel, year=year) nChunks = nChunks + 1 jobs.close() # SUBMIT jobName += "_%s_%s" % (channel, year) if args.force: submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: print "Not submitting jobs" print
def main(): channels = args.channels years = args.years tes = args.tes ltf = args.ltf jtf = args.jtf Zmass = args.Zmass prefetch = args.prefetch batchSystem = 'submit_SGE.sh' tag = args.tag if tag and tag[0] != '_': tag = '_' + tag if tes != 1.: tag += "_TES%.3f" % (tes) if ltf != 1.: tag += "_LTF%.3f" % (ltf) if jtf != 1.: tag += "_JTF%.3f" % (jtf) if Zmass: tag += "_Zmass" tag = tag.replace('.', 'p') chunkpattern = re.compile(r".*_(\d+)_[a-z]+%s\.root" % tag) for year in years: # GET LIST samplelist = [] outdir = "output_%s/" % (year) for directory in sorted(os.listdir(outdir)): if not os.path.isdir(outdir + directory): continue if args.samples and not matchSampleToPattern( directory, args.samples): continue if args.vetoes and matchSampleToPattern(directory, args.vetoes): continue if args.type == 'mc' and any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma' ]): continue if args.type == 'data' and not any( s in directory[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma' ]): continue samplelist.append(directory) if not samplelist: print "No samples found in %s!" % (outdir) if args.verbose: print samplelist blacklist = getBlackList("filelist/blacklist.txt") # RESUBMIT samples for channel in channels: print header(year, channel, tag) for directory in samplelist: #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue outdir = "output_%s/%s" % (year, directory) outfilelist = glob.glob("%s/*_%s%s.root" % (outdir, channel, tag)) nFilesPerJob = args.nFilesPerJob jobName = getSampleShortName(directory)[1] jobName += "_%s_%s" % (channel, year) + tag if not outfilelist: continue # GET SKIMMED if args.useSkim and isSkimmed(directory, year): directory = isSkimmed(directory, year) # FILE LIST infiles = [] if not args.useDAS: infiles = getFileListLocal(directory) if not infiles: if not args.useDAS: print "Getting file list from DAS..." infiles = getFileList(directory) if infiles: saveFileListLocal(directory, infiles) if not infiles: print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC continue elif args.verbose: print "FILELIST = " + infiles[0] for file in infiles[1:]: print " " + file # NFILESPERJOBS sample = '__'.join(directory.split('/')[-3:]) nFilesPerJob = args.nFilesPerJob if nFilesPerJob < 1: for default, patterns in nFilesPerJob_defaults: if matchSampleToPattern(sample, patterns): nFilesPerJob = default break else: nFilesPerJob = nFilesPerJob_default if args.verbose: print "nFilesPerJob = %s" % nFilesPerJob infilelists = chunkify(infiles, nFilesPerJob) # JOB LIST badchunks = [] misschunks = range(0, len(infilelists)) jobList = 'joblist/joblist_%s_%s%s_retry.txt' % (sample, channel, tag) with open(jobList, 'w') as jobslog: for filename in outfilelist: match = chunkpattern.search(filename) if match: chunk = int(match.group(1)) else: print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % ( filename) + bcolors.ENDC exit(1) if chunk in misschunks: misschunks.remove(chunk) elif chunk >= len(infilelists): print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s), check DAS, or resubmit everything!' % ( filename, chunk, len(infilelists), nFilesPerJob) + bcolors.ENDC else: print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % ( filename, chunk, len(infilelists)) + bcolors.ENDC file = TFile(filename, 'READ') if not file.IsZombie() and file.GetListOfKeys( ).Contains('tree') and file.GetListOfKeys().Contains( 'cutflow'): continue infiles = infilelists[chunk] for filename in blacklist: if filename in infiles: print ">>> removing blacklisted %s" % filename infiles.remove(filename) createJobs(jobslog, infiles, outdir, sample, chunk, channel, year=year, tes=tes, ltf=ltf, jtf=jtf, Zmass=Zmass, tag=tag, prefetch=prefetch) badchunks.append(chunk) # BAD CHUNKS if len(badchunks) > 0: badchunks.sort() chunktext = ('chunks ' if len(badchunks) > 1 else 'chunk ') + ', '.join( str(ch) for ch in badchunks) print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d jobs failed!\n Resubmitting %s...' % ( directory, len(badchunks), len(outfilelist), chunktext) + bcolors.ENDC # MISSING CHUNKS if len(misschunks) > 0: chunktext = ('chunks ' if len(misschunks) > 1 else 'chunk ') + ', '.join( str(i) for i in misschunks) print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files !\n Resubmitting %s..." % ( directory, len(misschunks), len(outfilelist), chunktext) + bcolors.ENDC for chunk in misschunks: infiles = infilelists[chunk] createJobs(jobslog, infiles, outdir, sample, chunk, channel, year=year, tes=tes, ltf=ltf, jtf=jtf, Zmass=Zmass, tag=tag, prefetch=prefetch) # RESUBMIT nChunks = len(badchunks) + len(misschunks) if nChunks == 0: print bcolors.BOLD + bcolors.OKGREEN + '[OK] %s' % directory + bcolors.ENDC elif args.force: submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitJobs(jobName, jobList, nChunks, outdir, batchSystem) else: print "Not submitting jobs" print
def main(): years = args.years outbasedir = "/scratch/ineuteli" batchscript = 'submit_SGE.sh' tag = "" for year in years: # READ SAMPLES directories = [] samplelist = "samples_%s.cfg" % (year) with open(samplelist, 'r') as file: for line in file: line = line.rstrip().lstrip().split(' ')[0].rstrip('/') if line[:2].count('#') > 0: continue if line == '': continue if '/pnfs/' in line: continue if args.samples and not matchSampleToPattern( line, args.samples): continue if args.vetos and matchSampleToPattern(line, args.vetos): continue if args.type == 'mc' and any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma']): continue if args.type == 'data' and not any( s in line[:len(s) + 2] for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma']): continue directories.append(line) if args.testrun: directories = directories[:1] blacklist = [] #getBlackList("filelist/blacklist.txt") print header(year, tag) # SUBMIT SAMPLES for directory in directories: if args.verbose: print "\ndirectory =", directory print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC # FILE LIST files = [] name = '__'.join(directory.split('/')[-3:]) sample = '/' + name.replace('__', '/') ###if not args.useDAS: ### files = getFileListLocal(directory,blacklist=blacklist) ###if not files: ###if not args.useDAS: ### print "Getting file list from DAS..." files = getFileListDAS(directory, blacklist=blacklist) ### if files: ### saveFileListLocal(name,files,blacklist=blacklist) if not files: print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC continue elif args.verbose: print "FILELIST = " + files[0] for file in files[1:]: print " " + file if args.testrun: files = files[:1] # JOB LIST ensureDirectory('joblist') jobList = 'joblist/joblist_%s_skim%s.txt' % (name, tag) print "Creating job file %s..." % (jobList) jobName = getSampleShortName(directory)[1] jobName += "_%s_skim" % (year) + tag outdir = "%s/output_%s/%s" % (outbasedir, year, name) logdir = ensureDirectory("skim_logs_%s/%s" % (year, name)) # NFILESPERJOBS nFilesPerJob = args.nFilesPerJob if nFilesPerJob < 1: for default, patterns in nFilesPerJob_defaults: if matchSampleToPattern(directory, patterns): nFilesPerJob = default break else: nFilesPerJob = nFilesPerJob_default # default if args.verbose: print "nFilesPerJob = %s" % nFilesPerJob filelists = chunkify(files, nFilesPerJob) # CREATE JOBS with open(jobList, 'w') as jobs: nChunks = 0 for filelist in filelists: createSkimJobs(jobs, year, name, filelist, outdir, prefetch=args.prefetch) nChunks = nChunks + 1 # SUBMIT if args.force: submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript) else: submit = raw_input( "Do you also want to submit %d jobs to the batch system? [y/n] " % (nChunks)) if submit.lower() == 'force': submit = 'y' args.force = True if submit.lower() == 'quit': exit(0) if submit.lower() == 'y': submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript) else: print "Not submitting jobs" print