def main(opts): tdrstyle.TDRStyle() # loop over datasets histos = [] labels = [] for multicrabDir in opts.multicrabdir: crabDirs = multicrab.getTaskDirectories( None, os.path.join(multicrabDir, "multicrab.cfg")) for crabDir in crabDirs: taskName = os.path.split(crabDir)[1] rootFile = ROOT.TFile.Open( os.path.join(crabDir, "res", "histograms-%s.root" % taskName)) if rootFile.IsZombie(): raise Exception("Error: File 'histograms-%s.root' not found!" % taskName) # Get histogram histoName = "signalAnalysis/SignalSelectionFlowVsVertices" if opts.variation != None: histoName = "%s/SignalSelectionFlowVsVertices" % opts.variation[ 0] h = rootFile.Get(histoName) if h == None: raise Exception( "Error: histogram '%s' not found in ile 'histograms-%s.root'!" % (histoName, taskName)) histos.append(h) labels.append(taskName) # We have the histograms and names, lets loop over the selection steps makePlots(histos, labels, False) makePlots(histos, labels, True)
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S") log = open(logfile, "w") for task in taskDirs: if not os.path.exists(task): print "Skipping task %s, directory doesnt exist" % task continue ret = publish(task, log) if ret != 0: print "Publish error (%d) with task %s, see %s for details" % ( ret, task, logfile) log.close() return 1 if opts.report: ret = report(task, log) if ret != 0: print "Report error (%d) with task %s, see %s for details" % ( ret, task, logfile) log.close() return 1 if opts.move: shutil.move(task, task + "_published") log.close() return 0
def main(opts): if len(opts.dirs) == 0: print "No directories given" return 1 # (tmph, tmp) = tempfile.mkstemp(suffix=".patch") # print tmp # src = open("CMSSW_sh.patch") # dst = open(tmp, "w") # for line in src: # dst.write(line.replace("%%INPUT%%", opts.input)) # src.close() # dst.close() # os.remove(tmp) patch = "" src = open(os.path.join(os.environ["CMSSW_BASE"], "src/HiggsAnalysis/HeavyChHiggsToTauNu/test/tauEmbedding/CMSSW_sh.patch")) for line in src: patch += line.replace("%%INPUT%%", opts.input) src.close() # print patch taskDirs = multicrab.getTaskDirectories(opts) for d in taskDirs: cmd = ["patch", "-p0", os.path.join(d, "job", "CMSSW.sh")] p = subprocess.Popen(cmd, stdin=subprocess.PIPE) p.communicate(patch) if p.returncode != 0: return p.returncode return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) exe_re = re.compile("ExeTime=(?P<time>\d+)") user_re = re.compile("CrabUserCpuTime=(?P<time>\d+(\.\d+)?)") sys_re = re.compile("CrabSysCpuTime=(?P<time>\d+(\.\d+)?)") for task in taskDirs: files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout")) if len(files) == 0: continue exe_times = [] user_times = [] sys_times = [] for name in files: f = open(name) for line in f: m = exe_re.search(line) if m: exe_times.append(float(m.group("time"))) continue m = user_re.search(line) if m: user_times.append(float(m.group("time"))) continue m = sys_re.search(line) if m: sys_times.append(float(m.group("time"))) f.close() print "Task %s, %d jobs" % (task, len(files)) print times("Exe", exe_times) print times("User", user_times) print times("Sys", sys_times)
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S") log = open(logfile, "w") for task in taskDirs: if not os.path.exists(task): print "Skipping task %s, directory doesnt exist" % task continue ret = publish(task, log) if ret != 0: print "Publish error (%d) with task %s, see %s for details" % (ret, task, logfile) log.close() return 1 if opts.report: ret = report(task, log) if ret != 0: print "Report error (%d) with task %s, see %s for details" % (ret, task, logfile) log.close() return 1 if opts.move: shutil.move(task, task+"_published") log.close() return 0
def main(opts, args): workdir = os.getcwd() crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: print "Making export pack of %s" % d # Create the tar archive filename = "%s_export.tgz" % d taroptions = "w:gz" if opts.fullStatus: filename = "%s_export_unmerged.tar" % d taroptions = "w:" tar = tarfile.open(filename, mode="%s" % taroptions) # Check if everything is ok if not os.path.exists(d + "/lumi.json"): print "... Could not find lumi.json, if you wish to include it, run hplusLumiCalc.py" else: print " adding file %s/lumi.json" % d tar.add("%s/lumi.json" % d) if not os.path.exists(d + "/timeReport.txt"): print "... Could not find timeReport.txt, if you wish to include it run hplusMultiCrabAnalysis --time >! timeReport.txt" else: print " adding timeReport.txt" tar.add("%s/timeReport.txt" % d) if os.path.exists(d + "/codeDiff.txt"): tar.add("%s/codeDiff.txt" % d) if os.path.exists(d + "/codeStatus.txt"): tar.add("%s/codeStatus.txt" % d) if os.path.exists(d + "/codeVersion.txt"): tar.add("%s/codeVersion.txt" % d) for jobdir in glob.glob(d + "/*/job"): tar.add(jobdir) if opts.fullStatus: for f in [ "%s/*py" % d, "%s/*cfg" % d, "%s/*/res/histograms_*root" % d ]: list = glob.glob(f) for i in list: print " adding file ", i tar.add(i) else: for f in [ "%s/*py" % d, "%s/*cfg" % d, "%s/*/res/histograms-*root" % d ]: list = glob.glob(f) for i in list: print " adding file ", i tar.add(i) tar.close() print "Written file %s" % filename
def readDatasetDirs(opts, crossSections, datasets): taskdirs = multicrab.getTaskDirectories(opts) for d in taskdirs: files = glob.glob(os.path.join(d, "res", opts.input)) if len(files) > 1: raise Exception("Only one file should match the input (%d matched) for task %s" % (len(files), d)) return 1 elif len(files) == 0: raise Exception("No files matched to input for task %s" % d) datasets.append(counter.readDataset(files[0], opts.counterdir, d, crossSections))
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) analyses = [] watchdogAnalyses = [] if opts.time: analyses.append(TimeAnalysis()) if opts.size: analyses.append(SizeAnalysis(opts.sizeFile)) if opts.memory: watchdogAnalyses.append(MemoryAnalysis()) if opts.host: analyses.append(HostAnalysis()) if opts.watchdog: analyses.append(WatchdogAnalysis()) if len(analyses) + len(watchdogAnalyses) == 0: return 1 for task in taskDirs: files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout")) files = excludeInclude(files, opts.include, opts.exclude) wfiles = [] if len(watchdogAnalyses) > 0: wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz")) wfiles = excludeInclude(wfiles, opts.include, opts.exclude) if opts.byStatus: try: jobs = multicrab.crabStatusToJobs(task, opts.printCrab) except Exception: if not opts.allowFails: raise print "%s: crab -status failed" % task continue print "Task %s" % task # Ignore running jobs for status in multicrabStatus.order_run: if status in jobs: del jobs[status] stats = jobs.keys() stats.sort() for status in stats: ids = ",".join(["%d" % j.id for j in jobs[status]]) f = excludeInclude(files, ids) wf = excludeInclude(wfiles, ids) print " %s, %d jobs" % (status, len(files)) analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ") else: print "Task %s, %d jobs" % (task, len(files)) analyseTask(files, wfiles, analyses, watchdogAnalyses) return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) analyses = [] watchdogAnalyses = [] if opts.time: analyses.append(TimeAnalysis()) if opts.size: analyses.append(SizeAnalysis(opts.sizeFile)) if opts.memory: watchdogAnalyses.append(MemoryAnalysis()) if opts.host: analyses.append(HostAnalysis()) if opts.watchdog: analyses.append(WatchdogAnalysis()) if len(analyses)+len(watchdogAnalyses) == 0: return 1 for task in taskDirs: files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout")) files = excludeInclude(files, opts.include, opts.exclude) wfiles = [] if len(watchdogAnalyses) > 0: wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz")) wfiles = excludeInclude(wfiles, opts.include, opts.exclude) if opts.byStatus: try: jobs = multicrab.crabStatusToJobs(task, opts.printCrab) except Exception: if not opts.allowFails: raise print "%s: crab -status failed" % task continue print "Task %s" % task # Ignore running jobs for status in multicrabStatus.order_run: if status in jobs: del jobs[status] stats = jobs.keys() stats.sort() for status in stats: ids = ",".join(["%d"%j.id for j in jobs[status]]) f = excludeInclude(files, ids) wf = excludeInclude(wfiles, ids) print " %s, %d jobs" % (status, len(files)) analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ") else: print "Task %s, %d jobs" % (task, len(files)) analyseTask(files, wfiles, analyses, watchdogAnalyses) return 0
def readDatasetDirs(opts, crossSections, datasets): taskdirs = multicrab.getTaskDirectories(opts) for d in taskdirs: files = glob.glob(os.path.join(d, "res", opts.input)) if len(files) > 1: raise Exception( "Only one file should match the input (%d matched) for task %s" % (len(files), d)) return 1 elif len(files) == 0: raise Exception("No files matched to input for task %s" % d) datasets.append( counter.readDataset(files[0], opts.counterdir, d, crossSections))
def main(opts, args): workdir = os.getcwd() crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: print "Making export pack of %s"%d # Create the tar archive filename = "%s_export.tgz"%d taroptions = "w:gz" if opts.fullStatus: filename = "%s_export_unmerged.tar"%d taroptions = "w:" tar = tarfile.open(filename, mode="%s"%taroptions) # Check if everything is ok if not os.path.exists(d+"/lumi.json"): print "... Could not find lumi.json, if you wish to include it, run hplusLumiCalc.py" else: print " adding file %s/lumi.json"%d tar.add("%s/lumi.json"%d) if not os.path.exists(d+"/timeReport.txt"): print "... Could not find timeReport.txt, if you wish to include it run hplusMultiCrabAnalysis --time >! timeReport.txt" else: print " adding timeReport.txt" tar.add("%s/timeReport.txt"%d) if os.path.exists(d+"/codeDiff.txt"): tar.add("%s/codeDiff.txt"%d) if os.path.exists(d+"/codeStatus.txt"): tar.add("%s/codeStatus.txt"%d) if os.path.exists(d+"/codeVersion.txt"): tar.add("%s/codeVersion.txt"%d) for jobdir in glob.glob(d+"/*/job"): tar.add(jobdir) if opts.fullStatus: for f in ["%s/*py"%d, "%s/*cfg"%d, "%s/*/res/histograms_*root"%d]: list = glob.glob(f) for i in list: print " adding file ",i tar.add(i) else: for f in ["%s/*py"%d, "%s/*cfg"%d, "%s/*/res/histograms-*root"%d]: list = glob.glob(f) for i in list: print " adding file ",i tar.add(i) tar.close() print "Written file %s"%filename
def main(opts, args): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() allJobs = [] for task in taskDirs: if not os.path.exists(task): print "%s: Task directory missing" % task continue jobs = multicrab.crabStatusToJobs(task) if not "Created" in jobs: print "%s: no 'Created' jobs to submit" % task continue allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"])) maxJobs = len(allJobs) if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs): maxJobs = opts.maxJobs njobsSubmitted = 0 while njobsSubmitted < maxJobs: njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs)) njobsSubmitted += njobsToSubmit jobsToSubmit = {} for n in xrange(0, njobsToSubmit): job = allJobs.pop(0) multicrab._addToDictList(jobsToSubmit, job.task, job.id) for task, jobs in jobsToSubmit.iteritems(): pretty = multicrab.prettyJobnums(jobs) command = ["crab", "-c", task, "-submit", pretty] + args print "Submitting %d jobs from task %s" % (len(jobs), task) print "Command", " ".join(command) if not opts.test: ret = subprocess.call(command) if ret != 0: message = "Command '%s' failed with exit code %d" % ( " ".join(command), ret) if opts.allowFails: print message else: raise Exception() if njobsSubmitted < maxJobs: print "Submitted, sleeping %f seconds" % opts.sleep time.sleep(opts.sleep) else: print "Submitted" return 0
def main(opts, args): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() allJobs = [] for task in taskDirs: if not os.path.exists(task): print "%s: Task directory missing" % task continue jobs = multicrab.crabStatusToJobs(task) if not "Created" in jobs: print "%s: no 'Created' jobs to submit" % task continue allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"])) maxJobs = len(allJobs) if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs): maxJobs = opts.maxJobs njobsSubmitted = 0 while njobsSubmitted < maxJobs: njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs)) njobsSubmitted += njobsToSubmit jobsToSubmit = {} for n in xrange(0, njobsToSubmit): job = allJobs.pop(0) multicrab._addToDictList(jobsToSubmit, job.task, job.id) for task, jobs in jobsToSubmit.iteritems(): pretty = multicrab.prettyJobnums(jobs) command = ["crab", "-c", task, "-submit", pretty] + args print "Submitting %d jobs from task %s" % (len(jobs), task) print "Command", " ".join(command) if not opts.test: ret = subprocess.call(command) if ret != 0: message = "Command '%s' failed with exit code %d" % (" ".join(command), ret) if opts.allowFails: print message else: raise Exception() if njobsSubmitted < maxJobs: print "Submitted, sleeping %f seconds" % opts.sleep time.sleep(opts.sleep) else: print "Submitted" return 0
def main(opts, args): crabdirs = multicrab.getTaskDirectories(opts) global re_histo re_histo = re.compile("^output files:.*?(?P<file>%s)" % opts.input) mergedFiles = [] for d in crabdirs: d = d.replace("/", "") stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout")) files = [] for f in stdoutFiles: try: histoFile = getHistogramFile(f) if histoFile != None: files.append(os.path.join(os.path.dirname(f), histoFile)) else: print "Skipping task %s, job %s: input root file not found" % ( d, f) except multicrab.ExitCodeException, e: print "Skipping task %s, job %s: %s" % (d, f, str(e)) if len(files) == 0: print "Task %s, skipping, no files to merge" % d continue print "Task %s, merging %d file(s)" % (d, len(files)) mergeName = os.path.join(d, "res", opts.output % d) #cmd = "mergeTFileServiceHistograms -o %s -i %s" % ("histograms-"+d+".root", " ".join(files)) #print files #ret = subprocess.call(["mergeTFileServiceHistograms", # "-o", mergeName, # "-i"]+files) if os.path.exists(mergeName): shutil.move(mergeName, mergeName + ".backup") p = subprocess.Popen(["hadd", mergeName] + files, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print output print "Merging failed with exit code %d" % ret return 1 mergedFiles.append((mergeName, len(files)))
def main(opts, args): workdir = os.getcwd() crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: # Go to task directory (in order to get the paths in the archive correctly easily) os.chdir(d) # Remove default.tgz tmp = os.path.join("share", "default.tgz") if os.path.exists(tmp): os.remove(tmp) # Create the tar archive tar = tarfile.open("task.tar.gz", mode="w:gz") files = [] for f in ["CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*"]: files.extend(glob.glob(os.path.join("res", f))) files.extend(["job", "log", "share"]) #print "\n".join(files) for f in files: tar.add(f) tar.close() # Keep share/crab.cfg files.remove("share") sharefiles = glob.glob(os.path.join("share", "*")) sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles) files.extend(sharefiles) # Delete the files just added to the archive for f in files: if os.path.isfile(f): #print "rm "+f os.remove(f) elif os.path.isdir(f): #print "rm -fR "+f shutil.rmtree(f) else: print "Not removing "+f print "Compacted", d os.chdir(workdir)
def main(opts, args): crabdirs = multicrab.getTaskDirectories(opts) global re_histo re_histo = re.compile("^output files:.*?(?P<file>%s)" % opts.input) mergedFiles = [] for d in crabdirs: d = d.replace("/", "") stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout")) files = [] for f in stdoutFiles: try: histoFile = getHistogramFile(f) if histoFile != None: files.append(os.path.join(os.path.dirname(f), histoFile)) else: print "Skipping task %s, job %s: input root file not found" % (d, f) except multicrab.ExitCodeException, e: print "Skipping task %s, job %s: %s" % (d, f, str(e)) if len(files) == 0: print "Task %s, skipping, no files to merge" % d continue print "Task %s, merging %d file(s)" % (d, len(files)) mergeName = os.path.join(d, "res", opts.output % d) #cmd = "mergeTFileServiceHistograms -o %s -i %s" % ("histograms-"+d+".root", " ".join(files)) #print files #ret = subprocess.call(["mergeTFileServiceHistograms", # "-o", mergeName, # "-i"]+files) if os.path.exists(mergeName): shutil.move(mergeName, mergeName+".backup") p = subprocess.Popen(["hadd", mergeName]+files, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print output print "Merging failed with exit code %d" % ret return 1 mergedFiles.append((mergeName, len(files)))
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S") log = open(logfile, "w") for task in taskDirs: if not os.path.exists(task): print "Skipping task %s, directory doesnt exist" % task continue ret = publish(task, log) if ret != 0: print "Publish error (%d) with task %s, see %s for details" % (ret, task, logfile) log.close() return 1 if opts.report: ret = report(task, log) if ret != 0: print "Report error (%d) with task %s, see %s for details" % (ret, task, logfile) log.close() return 1 log.close() # See if publication is complete, report if not and possibly move if is log = open(logfile) tasks = {} for d in taskDirs: tasks[d] = printPublished.Task(d) printPublished.addInputPublishToTasks(tasks) printPublished.parseLog(logfile, tasks) for key, task in tasks.iteritems(): if task.jobs_still_to_publish > 0: print "%s publication not complete, not moving (published %d, failed %d, still_to_publish %d)" % (key, task.jobs_published, task.jobs_failed, task.jobs_still_to_publish) elif opts.move: shutil.move(key, key+"_published") log.close() return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() for task in taskDirs: if not os.path.exists(task): print "%s: Task directory missing" % task continue jobsList = [] if opts.getoutput is None: jobs = multicrab.crabStatusToJobs(task, printCrab=False) for key in jobs.keys(): if "Done" in key: jobsList.extend([j.id for j in jobs[key]]) else: jobsList.extend(multicrab.prettyToJobList(opts.getoutput)) if len(jobsList) == 0: print "%s: no jobs to retrieve" % task continue # Getoutput loop maxJobs = len(jobsList) if opts.jobs > 0: maxJobs = opts.jobs for i in xrange(0, int(math.ceil(float(len(jobsList))/maxJobs))): jobsToGet = jobsList[i*maxJobs:(i+1)*maxJobs] jobsStr = ",".join([str(j) for j in jobsToGet]) command = ["crab", "-c", task, "-getoutput", jobsStr] print "Getting %d jobs from task %s" % (len(jobsToGet), task) print "Command", " ".join(command) ret = subprocess.call(command) if ret != 0: print "Command '%s' failed with exit code %s" % (" ".join(command), ret) if not opts.allowFails: return 1 return 0
def main(opts): if len(opts.dirs) == 0: print "No directories given" return 1 # (tmph, tmp) = tempfile.mkstemp(suffix=".patch") # print tmp # src = open("CMSSW_sh.patch") # dst = open(tmp, "w") # for line in src: # dst.write(line.replace("%%INPUT%%", opts.input)) # src.close() # dst.close() # os.remove(tmp) patch = "" src = open( os.path.join( os.environ["CMSSW_BASE"], "src/HiggsAnalysis/HeavyChHiggsToTauNu/test/tauEmbedding/CMSSW_sh.patch" )) for line in src: patch += line.replace("%%INPUT%%", opts.input) src.close() # print patch taskDirs = multicrab.getTaskDirectories(opts) for d in taskDirs: cmd = ["patch", "-p0", os.path.join(d, "job", "CMSSW.sh")] p = subprocess.Popen(cmd, stdin=subprocess.PIPE) p.communicate(patch) if p.returncode != 0: return p.returncode return 0
def main(opts): tdrstyle.TDRStyle() # loop over datasets histos = [] labels = [] for multicrabDir in opts.multicrabdir: crabDirs = multicrab.getTaskDirectories(None, os.path.join(multicrabDir, "multicrab.cfg")) for crabDir in crabDirs: taskName = os.path.split(crabDir)[1] rootFile = ROOT.TFile.Open(os.path.join(crabDir, "res", "histograms-%s.root"%taskName)) if rootFile.IsZombie(): raise Exception ("Error: File 'histograms-%s.root' not found!"%taskName) # Get histogram histoName = "signalAnalysis/SignalSelectionFlowVsVertices" if opts.variation != None: histoName = "%s/SignalSelectionFlowVsVertices"%opts.variation[0] h = rootFile.Get(histoName) if h == None: raise Exception ("Error: histogram '%s' not found in ile 'histograms-%s.root'!"%(histoName,taskName)) histos.append(h) labels.append(taskName) # We have the histograms and names, lets loop over the selection steps makePlots(histos,labels,False) makePlots(histos,labels,True)
def main(opts, args): workdir = os.getcwd() crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: # Run crab -report if opts.report: multicrab.checkCrabInPath() cmd = ["crab", "-report", "-c", d] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to 'crab -report -d %s' failed with return value %d" % ( d, ret) print output return 1 if os.path.exists(os.path.join(d, "task.tar.gz")): print "Skipping %s, task.tar.gz already exists" % d continue # Go to task directory (in order to get the paths in the archive correctly easily) os.chdir(d) # Remove default.tgz tmp = os.path.join("share", "default.tgz") if os.path.exists(tmp): os.remove(tmp) # Create the tar archive tar = tarfile.open("task.tar.gz", mode="w:gz") files = [] for f in [ "CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*", "Watchdog_*.log*" ]: files.extend(glob.glob(os.path.join("res", f))) files.extend(["job", "log", "share"]) #print "\n".join(files) for f in files: if os.path.exists(f): tar.add(f) tar.close() # Keep share/crab.cfg files.remove("share") sharefiles = glob.glob(os.path.join("share", "*")) sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles) files.extend(sharefiles) # Delete the files just added to the archive for f in files: if os.path.isfile(f): #print "rm "+f try: os.remove(f) except OSError, e: print "Warning: failed to remove %s: %s" % (f, str(e)) elif os.path.isdir(f): #print "rm -fR "+f try: shutil.rmtree(f) except OSError, e: print "Warning: failed to remove %s: %s" % (f, str(e))
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() resubmitJobs = {} failedJobs = {} stats = {} allJobs = 0 if opts.byHost: global status_format status_format = status_format.replace("18s", "40s") if opts.save: out = open(opts.saveFile, "w") for task in taskDirs: if not os.path.exists(task): if opts.showMissing: print >> sys.stderr, "%s: Task directory missing" % task continue try: jobs = multicrab.crabStatusToJobs(task, opts.printCrab) except Exception: if not opts.allowFails: raise print "%s: crab -status failed" % task continue jobSummaries = {} njobs = 0 for key, item in jobs.iteritems(): hosts = {} for job in item: if job.host != None: aux.addToDictList(hosts, job.host, job) if opts.byHost: for host, joblist in hosts.iteritems(): jobSummaries[key + " " + host] = JobSummary( joblist, [host]) else: jobSummaries[key] = JobSummary(item, hosts) l = len(item) njobs += l allJobs += l if key in stats: stats[key] += l else: stats[key] = l # First the succesfully done line = "%s (%d jobs):" % (task, njobs) for s in order_done: if s in jobSummaries: line = formatSummaries(opts, line, s, jobSummaries[s]) del jobSummaries[s] # Then the aborted-submitted to the end of the line line_end = "" for s in order_run: if s in jobSummaries: line_end = formatSummaries(opts, line_end, s, jobSummaries[s]) del jobSummaries[s] # Then the failed ones to the middle keys = jobSummaries.keys() keys.sort() for key in keys: line = formatSummaries(opts, line, key, jobSummaries[key]) line += line_end if line[-1] == ",": line = line[0:-1] if opts.save: out.write(line) out.write("\n") print line # Infer the jobs to be resubmitted failed = [] for key, joblist in jobs.iteritems(): for job in joblist: if job.failed(opts.resubmit): failed.append((job.id, job.jobExitCode)) if len(failed) > 0: failed.sort() pretty = multicrab.prettyJobnums([x[0] for x in failed]) resubmitJobs[task] = pretty for jobId, jobCode in failed: aux.addToDictList(failedJobs, jobCode, "%s/res/CMSSW_%d.stdout" % (task, jobId)) summary = StringIO.StringIO() summary.write("----------------------------------------\n") print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs) for s in order_done: if s in stats: summary.write(status_format % (s + ":", stats[s])) summary.write("\n") del stats[s] b = [] for s in order_run: if s in stats: b.append(status_format % (s + ":", stats[s])) del stats[s] keys = stats.keys() keys.sort() for key in keys: summary.write(status_format % (key + ":", stats[key])) summary.write("\n") for line in b: summary.write(line) summary.write("\n") summary.write("----------------------------------------\n") if len(resubmitJobs) == 0: summary.write("No failed/aborted jobs to resubmit\n") else: summary.write( "Following jobs failed/aborted, and can be resubmitted\n\n") for task in taskDirs: if task in resubmitJobs: summary.write("crab -c %s -resubmit %s\n" % (task, resubmitJobs[task])) summary.write("\n") if opts.failedLogs: summary.write("----------------------------------------\n") summary.write("Log files of failed jobs\n") keys = failedJobs.keys() keys.sort() for code in keys: summary.write("\nJob exit code %d:\n" % code) summary.write("\n".join(failedJobs[code])) summary.write("\n") if opts.save: out.write(summary.getvalue()) out.close() print summary.getvalue() return 0
def main(opts, args): workdir = os.getcwd() crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: # Run crab -report if opts.report: multicrab.checkCrabInPath() cmd = ["crab", "-report", "-c", d] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to 'crab -report -d %s' failed with return value %d" % (d, ret) print output return 1 if os.path.exists(os.path.join(d, "task.tar.gz")): print "Skipping %s, task.tar.gz already exists" % d continue # Go to task directory (in order to get the paths in the archive correctly easily) os.chdir(d) # Remove default.tgz tmp = os.path.join("share", "default.tgz") if os.path.exists(tmp): os.remove(tmp) # Create the tar archive tar = tarfile.open("task.tar.gz", mode="w:gz") files = [] for f in ["CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*", "Watchdog_*.log*"]: files.extend(glob.glob(os.path.join("res", f))) files.extend(["job", "log", "share"]) #print "\n".join(files) for f in files: if os.path.exists(f): tar.add(f) tar.close() # Keep share/crab.cfg files.remove("share") sharefiles = glob.glob(os.path.join("share", "*")) sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles) files.extend(sharefiles) # Delete the files just added to the archive for f in files: if os.path.isfile(f): #print "rm "+f try: os.remove(f) except OSError, e: print "Warning: failed to remove %s: %s" % (f, str(e)) elif os.path.isdir(f): #print "rm -fR "+f try: shutil.rmtree(f) except OSError, e: print "Warning: failed to remove %s: %s" % (f, str(e))
def main(opts, args): crabdirs = multicrab.getTaskDirectories(opts) global re_histos re_histos.append(re.compile("^output files:.*?(?P<file>%s)" % opts.input)) re_histos.append(re.compile("^\s+file\s+=\s+(?P<file>%s)" % opts.input)) mergedFiles = [] for d in crabdirs: d = d.replace("/", "") stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout")) files = [] for f in stdoutFiles: try: if opts.filesInSE: histoFile = getHistogramFileSE(f, opts) if histoFile != None: files.append(histoFile) else: print "Task %s, skipping job %s: input root file not found from stdout" % (d, f) else: histoFile = getHistogramFile(f, opts) if histoFile != None: path = os.path.join(os.path.dirname(f), histoFile) if os.path.exists(path): files.append(path) else: print "Task %s, skipping job %s: input root file found from stdout, but does not exist" % (d, f) else: print "Task %s, skipping job %s: input root file not found from stdout" % (d, f) except multicrab.ExitCodeException, e: print "Task %s, skipping job %s: %s" % (d, f, str(e)) if len(files) == 0: print "Task %s, skipping, no files to merge" % d continue for f in files: if not os.path.isfile(f): raise Exception("File %s is marked as output file in the CMSSW_N.stdout, but does not exist" % f) filesSplit = splitFiles(files, opts.filesPerMerge) if len(filesSplit) == 1: print "Task %s, merging %d file(s)" % (d, len(files)) else: print "Task %s, merging %d file(s) to %d files" % (d, len(files), len(filesSplit)) for index, inputFiles in filesSplit: tmp = d if len(filesSplit) > 1: tmp += "-%d" % index mergeName = os.path.join(d, "res", opts.output % tmp) if os.path.exists(mergeName) and not opts.test: if opts.verbose: print "mv %s %s" % (mergeName, mergeName+".backup") shutil.move(mergeName, mergeName+".backup") # FIXME: add here reading of first xrootd file, finding all TTrees, and writing the TList to mergeName file if opts.filesInSE: raise Exception("--filesInSE feature is not fully implemented") if len(inputFiles) == 1: if opts.verbose: print "cp %s %s" % (inputFiles[0], mergeName) if not opts.test: shutil.copy(inputFiles[0], mergeName) else: if opts.fast: ret = hplusHadd(opts, mergeName, inputFiles) if ret != 0: return ret else: ret = hadd(opts, mergeName, inputFiles) if ret != 0: return ret if len(filesSplit) > 1: print " done %d" % index mergedFiles.append((mergeName, inputFiles)) try: sanityCheck(mergeName, inputFiles) except SanityCheckException, e: print "Task %s: %s; disabling input file deletion" % (d, str(e)) opts.deleteImmediately = False opts.delete = False if opts.deleteImmediately: for srcFile in inputFiles: if opts.verbose: print "rm %s" % srcFile if not opts.test: os.remove(srcFile)
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() resubmitJobs = {} failedJobs = {} stats = {} allJobs = 0 for task in taskDirs: if not os.path.exists(task): if opts.showMissing: print "%s: Task directory missing" % task continue jobs = multicrab.crabStatusToJobs(task) jobSummaries = {} njobs = 0 for key, item in jobs.iteritems(): hosts = {} for job in item: if job.host != None: hosts[job.host] = 1 l = len(item) jobSummaries[key] = JobSummary(item, hosts) njobs += l allJobs += l if key in stats: stats[key] += l else: stats[key] = l # First the succesfully done line = "%s (%d jobs):" % (task, njobs) for s in order_done: if s in jobSummaries: line = formatSummaries(opts, line, s, jobSummaries[s]) del jobSummaries[s] # Then the aborted-submitted to the end of the line line_end = "" for s in order_run: if s in jobSummaries: line_end = formatSummaries(opts, line_end, s, jobSummaries[s]) del jobSummaries[s] # Then the failed ones to the middle keys = jobSummaries.keys() keys.sort() for key in keys: line = formatSummaries(opts, line, key, jobSummaries[key]) line += line_end if line[-1] == ",": line = line[0:-1] print line # Infer the jobs to be resubmitted failed = [] for key, joblist in jobs.iteritems(): for job in joblist: if job.failed(opts.resubmit): failed.append((job.id, job.jobExitCode)) if len(failed) > 0: failed.sort() pretty = multicrab.prettyJobnums([x[0] for x in failed]) resubmitJobs[task] = pretty for jobId, jobCode in failed: multicrab._addToDictList( failedJobs, jobCode, "%s/res/CMSSW_%d.stdout" % (task, jobId)) print "----------------------------------------" print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs) for s in order_done: if s in stats: print status_format % (s + ":", stats[s]) del stats[s] b = [] for s in order_run: if s in stats: b.append(status_format % (s + ":", stats[s])) del stats[s] keys = stats.keys() keys.sort() for key in keys: print status_format % (key + ":", stats[key]) for line in b: print line print "----------------------------------------" if len(resubmitJobs) == 0: print "No failed/aborted jobs to resubmit" else: print "Following jobs failed/aborted, and can be resubmitted" print for task in taskDirs: if task in resubmitJobs: print "crab -c %s -resubmit %s" % (task, resubmitJobs[task]) print if opts.failedLogs: print "----------------------------------------" print "Log files of failed jobs" keys = failedJobs.keys() keys.sort() for code in keys: print print "Job exit code %d:" % code print "\n".join(failedJobs[code]) return 0
def main(opts, args): if opts.report: multicrab.checkCrabInPath() cell = "\|\s+(?P<%s>\S+)\s+" lumi_re = re.compile((cell % "deliveredls") + (cell % "delivered") + (cell % "selectedls") + (cell % "recorded") + "\|") #lumi_re = re.compile("\|\s(?P<recorded>\S+)\s") unit_re = re.compile("Recorded\(/(?P<unit>.*)\)") if not opts.truncate and os.path.exists(opts.output): f = open(opts.output, "r") data = json.load(f) f.close() files = [] # only if no explicit files, or some directories explicitly given if len(opts.files) == 0 or len(opts.dirs) > 0: crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: if isMCTask(d): print " Ignoring task directory '%s', it looks like MC" % d continue if opts.report: cmd = ["crab", "-report", "-c", d] if opts.verbose: print " ".join(cmd) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to 'crab -report -d %s' failed with return value %d" % ( d, ret) print output return 1 if opts.verbose: print output files.append((d, os.path.join(d, "res", "lumiSummary.json"))) files.extend([(None, f) for f in opts.files]) data = {} for task, jsonfile in files: #print #print "================================================================================" #print "Dataset %s:" % d cmd = [ "lumiCalc2.py", "-i", jsonfile, "--nowarning", "overview", "-b", "stable" ] if opts.lumicalc1: cmd = [ "lumiCalc.py", "-i", jsonfile, "--with-correction", "--nowarning", "overview", "-b", "stable" ] #cmd = ["lumiCalc.py", "-c", "frontier://LumiCalc/CMS_LUMI_PROD", "-r", "132440", "--nowarning", "overview"] #ret = subprocess.call(cmd) if opts.verbose: print " ".join(cmd) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to lumiCalc.py failed with return value %d with command" % ret print " ".join(cmd) print output return 1 if opts.verbose: print output lines = output.split("\n") lines.reverse() lumi = -1.0 unit = None for line in lines: m = unit_re.search(line) if m: unit = m.group("unit") break m = lumi_re.search(line) if m: lumi = float(m.group("recorded")) # lumiCalc2.py returns pb^-1 # if opts.lumicalc1: # lumi = lumi/1e6 # ub^-1 -> pb^-1, lumiCalc.py returns ub^-1 continue if unit == None: raise Exception( "Didn't find unit information from lumiCalc output, command was %s" % " ".join(cmd)) lumi = convertLumi(lumi, unit) if task == None: print "File %s recorded luminosity %f pb^-1" % (jsonfile, lumi) else: print "Task %s recorded luminosity %f pb^-1" % (task, lumi) data[task] = lumi # Save the json file after each data task in case of future errors if len(data) > 0: f = open(opts.output, "wb") json.dump(data, f, sort_keys=True, indent=2) f.close() if len(data) > 0: f = open(opts.output, "wb") json.dump(data, f, sort_keys=True, indent=2) f.close() return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() resubmitJobs = {} failedJobs = {} stats = {} allJobs = 0 for task in taskDirs: if not os.path.exists(task): if opts.showMissing: print "%s: Task directory missing" % task continue jobs = multicrab.crabStatusToJobs(task) jobSummaries = {} njobs = 0 for key, item in jobs.iteritems(): hosts = {} for job in item: if job.host != None: hosts[job.host] = 1 l = len(item) jobSummaries[key] = JobSummary(item, hosts) njobs += l allJobs += l if key in stats: stats[key] += l else: stats[key] = l # First the succesfully done line = "%s (%d jobs):" % (task, njobs) for s in order_done: if s in jobSummaries: line = formatSummaries(opts, line, s, jobSummaries[s]) del jobSummaries[s] # Then the aborted-submitted to the end of the line line_end = "" for s in order_run: if s in jobSummaries: line_end = formatSummaries(opts, line_end, s, jobSummaries[s]) del jobSummaries[s] # Then the failed ones to the middle keys = jobSummaries.keys() keys.sort() for key in keys: line = formatSummaries(opts, line, key, jobSummaries[key]) line += line_end if line[-1] == ",": line = line[0:-1] print line # Infer the jobs to be resubmitted failed = [] for key, joblist in jobs.iteritems(): for job in joblist: if job.failed(opts.resubmit): failed.append( (job.id, job.jobExitCode) ) if len(failed) > 0: failed.sort() pretty = multicrab.prettyJobnums([x[0] for x in failed]) resubmitJobs[task] = pretty for jobId, jobCode in failed: multicrab._addToDictList(failedJobs, jobCode, "%s/res/CMSSW_%d.stdout" % (task, jobId)) print "----------------------------------------" print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs) for s in order_done: if s in stats: print status_format % (s+":", stats[s]) del stats[s] b = [] for s in order_run: if s in stats: b.append(status_format % (s+":", stats[s])) del stats[s] keys = stats.keys() keys.sort() for key in keys: print status_format % (key+":", stats[key]) for line in b: print line print "----------------------------------------" if len(resubmitJobs) == 0: print "No failed/aborted jobs to resubmit" else: print "Following jobs failed/aborted, and can be resubmitted" print for task in taskDirs: if task in resubmitJobs: print "crab -c %s -resubmit %s" % (task, resubmitJobs[task]) print if opts.failedLogs: print "----------------------------------------" print "Log files of failed jobs" keys = failedJobs.keys() keys.sort() for code in keys: print print "Job exit code %d:" % code print "\n".join(failedJobs[code]) return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() resubmitMode = (len(opts.resubmit) > 0) resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"]) if resubmitMode and resubmitIdListMode and len(taskDirs) != 1: print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len(taskDirs) return 1 if resubmitMode and resubmitIdListMode: resubmitJobList = multicrab.prettyToJobList(opts.resubmit) # Obtain all jobs to be (re)submitted allJobs = [] seBlackLists = {} for task in taskDirs: if not os.path.exists(task): print "%s: Task directory missing" % task continue cfgparser = ConfigParser.ConfigParser() cfgparser.read(os.path.join(task, "share", "crab.cfg")) if cfgparser.has_section("GRID"): availableOptions = cfgparser.options("GRID") blacklist = None for ao in availableOptions: if ao.lower() == "se_black_list": blacklist = cfgparser.get("GRID", ao) break seBlackLists[task] = blacklist jobs = multicrab.crabStatusToJobs(task, printCrab=False) if not resubmitMode: # normal submission if not "Created" in jobs: print "%s: no 'Created' jobs to submit" % task continue allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"])) elif not resubmitIdListMode: # resubmit all failed jobs status = "all" if opts.resubmit == "aborted": status = "aborted" for joblist in jobs.itervalues(): for job in joblist: if job.failed(status): allJobs.append(job) else: # resubmit explicit list of jobs for joblist in jobs.itervalues(): for job in joblist: if job.id in resubmitJobList: allJobs.append(job) resubmitJobList.remove(job.id) # Set the number of maximum jobs to submit maxJobs = len(allJobs) if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs): maxJobs = opts.maxJobs submitCommand = "-submit" if len(opts.resubmit) > 0: submitCommand = "-resubmit" sites = [] siteSubmitIndex = 0 if len(opts.toSites) > 0: sites = opts.toSites.split(",") # Submission loop njobsSubmitted = 0 while njobsSubmitted < maxJobs: # Construct list of jobs per task to submit njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs)) njobsSubmitted += njobsToSubmit jobsToSubmit = {} for n in xrange(0, njobsToSubmit): job = allJobs.pop(0) aux.addToDictList(jobsToSubmit, job.task, job.id) # If explicit list of sites to submit was given, get the site to submit this time crabOptions = [] if len(sites) > 0: site = sites[siteSubmitIndex] siteSubmitIndex = (siteSubmitIndex+1) % len(sites) crabOptions.append("-GRID.se_black_list= -GRID.se_white_list="+site) # Actual submission for task, jobs in jobsToSubmit.iteritems(): pretty = multicrab.prettyJobnums(jobs) if len(jobs) == 1: pretty += "," # CRAB thinks one number is number of jobs, the comma translates it to job ID command = ["crab", "-c", task, submitCommand, pretty] if opts.crabArgs != "": command.extend(opts.crabArgs.split(" ")) if len(crabOptions) > 0: command.extend(crabOptions) if opts.addSeBlackList != "": lst = seBlackLists[task] if lst is None: lst = opts.addSeBlackList else: lst += ","+opts.addSeBlackList command.extend(["-GRID.se_black_list="+lst]) print "Submitting %d jobs from task %s" % (len(jobs), task) print "Command", " ".join(command) if not opts.test: timesLeft = 1 if opts.tryAgainTimes > 0: timesLeft = opts.tryAgainTimes while timesLeft > 0: ret = subprocess.call(command) if ret == 0: break else: timesLeft -= 1 message = "Command '%s' failed with exit code %d" % (" ".join(command), ret) if opts.allowFails: print message if opts.tryAgainTimes > 0: print message if timesLeft > 0: print "Trying again after %d seconds (%d trials left)" % (opts.tryAgainSeconds, timesLeft) time.sleep(opts.tryAgainSeconds) else: print "No trials left, continuing with next job block" else: raise Exception() # Sleep between submissions if njobsSubmitted < maxJobs: print "Submitted, sleeping %f seconds" % opts.sleep time.sleep(opts.sleep) else: print "Submitted" return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) # Find task directories tasks = OrderedDict.OrderedDict() for d in taskDirs: if os.path.exists(d): tasks[d] = Task(d) elif os.path.exists(d + "_published"): print "%s: using directory %s_published" % (d, d) tasks[d] = Task(d + "_published") else: print "%s: directory not found, skipping" % d print # Find publish name from crab.cfg's addInputPublishToTasks(tasks) #print # Read publish.log files produced by hplusMultiCrabPublish.py publishLogs = glob.glob("publish_*.log") if len(publishLogs) == 0: print "Did not find any publish_*.log files, are you sure you've run hplusMultiCrabPublish?" return 1 publishLogs.sort() for logFile in publishLogs: parseLog(logFile, tasks) #print # Check if publication is complete taskNames = tasks.keys() taskNames.sort() for name in taskNames: task = tasks[name] still = task.jobs_still_to_publish if still is not None and still > 0: print "%s publication not complete (published %d, failed %d, still_to_publish %d)" % ( name, task.jobs_published, task.jobs_failed, still) del tasks[name] # Read time and size information timeAnalysis = multicrabAnalysis.TimeAnalysis() sizeAnalysis = multicrabAnalysis.SizeAnalysis(opts.sizeFile) analyses = [timeAnalysis, sizeAnalysis] print for key, task in tasks.iteritems(): # For this we don't care if the jobs succeeded or not outFiles = glob.glob( os.path.join(task.directory, "res", "CMSSW_*.stdout")) if len(outFiles) == 0: print "%s: 0 CMSSW_*.stdout files, something is badly wrong!" % key sys.exit(1) multicrabAnalysis.analyseFiles(outFiles, analyses) task.jobs = len(outFiles) task.time = timeAnalysis.userTime() task.size = sizeAnalysis.size() npublished = task.jobs_published if npublished is not None and npublished != task.jobs: print "%s publication nto complete (published %d of %d jobs)" % ( npublished, task.jobs) # Print out # print # for key, task in tasks.iteritems(): # print "# %s events, %d jobs" % (task.events, task.jobs) print "# %d jobs" % (task.jobs) print "# %s" % task.time print "# %s" % task.size print '"%s": TaskDef("%s", dbs="phys03"),' % (key, task.dbsPath) return 0
def main(opts): # open PU histograms fmc = ROOT.TFile.Open(opts.mcPU) if fmc.IsZombie(): sys.exit() hmcoriginal = fmc.Get("pileup") hmc = hmcoriginal.Clone("hmc") hmc.Scale(1.0 / hmc.Integral()) fdata = ROOT.TFile.Open(opts.dataPU) if fdata.IsZombie(): sys.exit() hdataoriginal = fdata.Get("pileup") hdata = hdataoriginal.Clone("hdata") hdata.Scale(1.0 / hdata.Integral()) fdataup = ROOT.TFile.Open(opts.dataPU.replace(".root","up.root")) if fdataup.IsZombie(): sys.exit() hdatauporiginal = fdataup.Get("pileup") hdataup = hdatauporiginal.Clone("hdataup") hdataup.Scale(1.0 / hdataup.Integral()) fdatadown = ROOT.TFile.Open(opts.dataPU.replace(".root","down.root")) if fdatadown.IsZombie(): sys.exit() hdatadownoriginal = fdatadown.Get("pileup") hdatadown = hdatadownoriginal.Clone("hdatadown") hdatadown.Scale(1.0 / hdatadown.Integral()) hweight = hdata.Clone() hweight.Divide(hmc) hweightUp = hdataup.Clone() hweightUp.Divide(hmc) hweightDown = hdatadown.Clone() hweightDown.Divide(hmc) ntupleCache = dataset.NtupleCache("pileupNtuple/tree", "PileupWeightSelector", selectorArgs=[hweight, hweightUp, hweightDown], ) topPtNames = ROOT.std.vector("string")() topPtFormulasAllHadr = ROOT.std.vector("string")() topPtFormulasSemiLep = ROOT.std.vector("string")() topPtFormulasDiLep = ROOT.std.vector("string")() print topPtNames for name, scheme in topPtWeightSchemes.schemes.iteritems(): topPtNames.push_back(name) topPtFormulasAllHadr.push_back(scheme.allhadronic) topPtFormulasSemiLep.push_back(scheme.leptonjets) topPtFormulasDiLep.push_back(scheme.dilepton) ntupleCacheTTJets = dataset.NtupleCache("pileupNtuple/tree", "PileupWeightSelector", selectorArgs=[hweight, hweightUp, hweightDown, topPtNames, topPtFormulasAllHadr, topPtFormulasSemiLep, topPtFormulasDiLep], cacheFileName="histogramCacheTTJets.root" ) # loop over datasets myoutput = "" for multicrabDir in opts.multicrabdir: crabDirs = multicrab.getTaskDirectories(None, os.path.join(multicrabDir, "multicrab.cfg")) for crabDir in crabDirs: taskName = os.path.split(crabDir)[1] rootFile = ROOT.TFile.Open(os.path.join(crabDir, "res", "histograms-%s.root"%taskName)) if rootFile.IsZombie(): sys.exit() # Create Dataset wrapper dset = DatasetWrapper(taskName, rootFile, multicrabDir) # Get tree for non-weighted number of events mytree = dset.getTree("pileupNtuple/tree") if mytree == 0: raise Exception("Did not find 'pileupNtuple/tree' from %s" % rootFile.GetName()) nevents = mytree.GetEntries() nc = ntupleCache topPtWeighting = opts.doTopPt and "TTJets" in taskName if topPtWeighting: nc = ntupleCacheTTJets # Process tree nc.process(dset) # Get results def getResult(histo): return nc.getRootHisto(dset, histo, None).GetBinContent(1) nevt = getResult("events") nevtup = getResult("eventsUp") nevtdown = getResult("eventsDown") rootFile.Close() # Write output line if topPtWeighting: taskPrefix = " "+'"'+taskName+'"'+": WeightedAllEventsTopPt(" myline = taskPrefix+"unweighted = WeightedAllEvents(unweighted=%d, "%nevents+"weighted=%f, "%nevt+"up=%f, "%nevtup+"down=%f),\n"%nevtdown for name in topPtWeightSchemes.schemes.iterkeys(): def construct(prefix, histoPostfix, postfix): top_nevt = getResult("events_topPt%s_%s"%(histoPostfix, name)) top_nevtup = getResult("eventsUp_topPt%s_%s"%(histoPostfix, name)) top_nevtdown = getResult("eventsDown_topPt%s_%s"%(histoPostfix, name)) return prefix + "=WeightedAllEvents(unweighted=%d, weighted=%f, up=%f, down=%f)" % (nevents, top_nevt, top_nevtup, top_nevtdown) + postfix + "\n" firstPrefix = " "*len(taskPrefix) + name + " = WeightedAllEventsTopPt.Weighted(" myline += construct(firstPrefix+"weighted", "", ",") myline += construct(" "*len(firstPrefix)+"up", "Up", ",") myline += construct(" "*len(firstPrefix)+"down", "Down", "),") myline += " "*len(taskPrefix)+"),\n" else: myline = " "+'"'+taskName+'"'+": WeightedAllEvents(unweighted=%d, "%nevents+"weighted=%f, "%nevt+"up=%f, "%nevtup+"down=%f),\n"%nevtdown #print "\n"+myline myoutput += myline myresult = "_weightedAllEvents = {\n" myresult += " "+'"'+"myera"+'"'+": {\n" myresult += myoutput myresult += " },\n" myresult += "}\n\n" print "" print myresult print "Copy the above fragment to python/tools/pileupReweightedAllEvents.py and replace 'myera' with appropriate label, e.g. 2011A\n" print "Result was obtained with PU histograms:" print " data:",opts.dataPU print " dataup:",opts.dataPU.replace(".root","up.root") print " datadown:",opts.dataPU.replace(".root","down.root") print " MC:",opts.mcPU print ""
def main(opts, args): if opts.report: multicrab.checkCrabInPath() cell = "\|\s+(?P<%s>\S+)\s+" lumi_re = re.compile((cell % "deliveredls")+ (cell % "delivered")+ (cell % "selectedls")+ (cell % "recorded")+"\|") #lumi_re = re.compile("\|\s(?P<recorded>\S+)\s") unit_re = re.compile("Recorded\(/(?P<unit>.*)\)") if not opts.truncate and os.path.exists(opts.output): f = open(opts.output, "r") data = json.load(f) f.close() files = [] # only if no explicit files, or some directories explicitly given if len(opts.files) == 0 or len(opts.dirs) > 0: crabdirs = multicrab.getTaskDirectories(opts) for d in crabdirs: if isMCTask(d): print " Ignoring task directory '%s', it looks like MC" % d continue if opts.report: cmd = ["crab", "-report", "-c", d] if opts.verbose: print " ".join(cmd) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to 'crab -report -d %s' failed with return value %d" % (d, ret) print output return 1 if opts.verbose: print output files.append((d, os.path.join(d, "res", "lumiSummary.json"))) files.extend([(None, f) for f in opts.files]) data = {} for task, jsonfile in files: #print #print "================================================================================" #print "Dataset %s:" % d cmd = ["lumiCalc2.py", "-i", jsonfile, "--nowarning", "overview", "-b", "stable"] if opts.lumicalc1: cmd = ["lumiCalc.py", "-i", jsonfile, "--with-correction", "--nowarning", "overview", "-b", "stable"] #cmd = ["lumiCalc.py", "-c", "frontier://LumiCalc/CMS_LUMI_PROD", "-r", "132440", "--nowarning", "overview"] #ret = subprocess.call(cmd) if opts.verbose: print " ".join(cmd) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] ret = p.returncode if ret != 0: print "Call to lumiCalc.py failed with return value %d with command" % ret print " ".join(cmd) print output return 1 if opts.verbose: print output lines = output.split("\n") lines.reverse() lumi = -1.0 unit = None for line in lines: m = unit_re.search(line) if m: unit = m.group("unit") break m = lumi_re.search(line) if m: lumi = float(m.group("recorded")) # lumiCalc2.py returns pb^-1 # if opts.lumicalc1: # lumi = lumi/1e6 # ub^-1 -> pb^-1, lumiCalc.py returns ub^-1 continue if unit == None: raise Exception("Didn't find unit information from lumiCalc output, command was %s" % " ".join(cmd)) lumi = convertLumi(lumi, unit) if task == None: print "File %s recorded luminosity %f pb^-1" % (jsonfile, lumi) else: print "Task %s recorded luminosity %f pb^-1" % (task, lumi) data[task] = lumi # Save the json file after each data task in case of future errors if len(data) > 0: f = open(opts.output, "wb") json.dump(data, f, sort_keys=True, indent=2) f.close() if len(data) > 0: f = open(opts.output, "wb") json.dump(data, f, sort_keys=True, indent=2) f.close() return 0
def main(opts): taskDirs = multicrab.getTaskDirectories(opts) multicrab.checkCrabInPath() resubmitMode = (len(opts.resubmit) > 0) resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"]) if resubmitMode and resubmitIdListMode and len(taskDirs) != 1: print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len( taskDirs) return 1 if resubmitMode and resubmitIdListMode: resubmitJobList = multicrab.prettyToJobList(opts.resubmit) # Obtain all jobs to be (re)submitted allJobs = [] seBlackLists = {} for task in taskDirs: if not os.path.exists(task): print "%s: Task directory missing" % task continue cfgparser = ConfigParser.ConfigParser() cfgparser.read(os.path.join(task, "share", "crab.cfg")) if cfgparser.has_section("GRID"): availableOptions = cfgparser.options("GRID") blacklist = None for ao in availableOptions: if ao.lower() == "se_black_list": blacklist = cfgparser.get("GRID", ao) break seBlackLists[task] = blacklist jobs = multicrab.crabStatusToJobs(task, printCrab=False) if not resubmitMode: # normal submission if not "Created" in jobs: print "%s: no 'Created' jobs to submit" % task continue allJobs.extend( filter(lambda j: isInRange(opts, j), jobs["Created"])) elif not resubmitIdListMode: # resubmit all failed jobs status = "all" if opts.resubmit == "aborted": status = "aborted" for joblist in jobs.itervalues(): for job in joblist: if job.failed(status): allJobs.append(job) else: # resubmit explicit list of jobs for joblist in jobs.itervalues(): for job in joblist: if job.id in resubmitJobList: allJobs.append(job) resubmitJobList.remove(job.id) # Set the number of maximum jobs to submit maxJobs = len(allJobs) if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs): maxJobs = opts.maxJobs submitCommand = "-submit" if len(opts.resubmit) > 0: submitCommand = "-resubmit" sites = [] siteSubmitIndex = 0 if len(opts.toSites) > 0: sites = opts.toSites.split(",") # Submission loop njobsSubmitted = 0 while njobsSubmitted < maxJobs: # Construct list of jobs per task to submit njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs)) njobsSubmitted += njobsToSubmit jobsToSubmit = {} for n in xrange(0, njobsToSubmit): job = allJobs.pop(0) aux.addToDictList(jobsToSubmit, job.task, job.id) # If explicit list of sites to submit was given, get the site to submit this time crabOptions = [] if len(sites) > 0: site = sites[siteSubmitIndex] siteSubmitIndex = (siteSubmitIndex + 1) % len(sites) crabOptions.append("-GRID.se_black_list= -GRID.se_white_list=" + site) # Actual submission for task, jobs in jobsToSubmit.iteritems(): pretty = multicrab.prettyJobnums(jobs) if len(jobs) == 1: pretty += "," # CRAB thinks one number is number of jobs, the comma translates it to job ID command = ["crab", "-c", task, submitCommand, pretty] if opts.crabArgs != "": command.extend(opts.crabArgs.split(" ")) if len(crabOptions) > 0: command.extend(crabOptions) if opts.addSeBlackList != "": lst = seBlackLists[task] if lst is None: lst = opts.addSeBlackList else: lst += "," + opts.addSeBlackList command.extend(["-GRID.se_black_list=" + lst]) print "Submitting %d jobs from task %s" % (len(jobs), task) print "Command", " ".join(command) if not opts.test: timesLeft = 1 if opts.tryAgainTimes > 0: timesLeft = opts.tryAgainTimes while timesLeft > 0: ret = subprocess.call(command) if ret == 0: break else: timesLeft -= 1 message = "Command '%s' failed with exit code %d" % ( " ".join(command), ret) if opts.allowFails: print message if opts.tryAgainTimes > 0: print message if timesLeft > 0: print "Trying again after %d seconds (%d trials left)" % ( opts.tryAgainSeconds, timesLeft) time.sleep(opts.tryAgainSeconds) else: print "No trials left, continuing with next job block" else: raise Exception() # Sleep between submissions if njobsSubmitted < maxJobs: print "Submitted, sleeping %f seconds" % opts.sleep time.sleep(opts.sleep) else: print "Submitted" return 0