def main(opts):
    tdrstyle.TDRStyle()
    # loop over datasets
    histos = []
    labels = []
    for multicrabDir in opts.multicrabdir:
        crabDirs = multicrab.getTaskDirectories(
            None, os.path.join(multicrabDir, "multicrab.cfg"))
        for crabDir in crabDirs:
            taskName = os.path.split(crabDir)[1]
            rootFile = ROOT.TFile.Open(
                os.path.join(crabDir, "res", "histograms-%s.root" % taskName))
            if rootFile.IsZombie():
                raise Exception("Error: File 'histograms-%s.root' not found!" %
                                taskName)
            # Get histogram
            histoName = "signalAnalysis/SignalSelectionFlowVsVertices"
            if opts.variation != None:
                histoName = "%s/SignalSelectionFlowVsVertices" % opts.variation[
                    0]
            h = rootFile.Get(histoName)
            if h == None:
                raise Exception(
                    "Error: histogram '%s' not found in ile 'histograms-%s.root'!"
                    % (histoName, taskName))
            histos.append(h)
            labels.append(taskName)
    # We have the histograms and names, lets loop over the selection steps
    makePlots(histos, labels, False)
    makePlots(histos, labels, True)
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S")
    log = open(logfile, "w")

    for task in taskDirs:
        if not os.path.exists(task):
            print "Skipping task %s, directory doesnt exist" % task
            continue
        ret = publish(task, log)
        if ret != 0:
            print "Publish error (%d) with task %s, see %s for details" % (
                ret, task, logfile)
            log.close()
            return 1

        if opts.report:
            ret = report(task, log)
            if ret != 0:
                print "Report error (%d) with task %s, see %s for details" % (
                    ret, task, logfile)
                log.close()
                return 1

        if opts.move:
            shutil.move(task, task + "_published")

    log.close()
    return 0
Example #3
0
def main(opts):
    if len(opts.dirs) == 0:
        print "No directories given"
        return 1

    # (tmph, tmp) = tempfile.mkstemp(suffix=".patch")
    # print tmp

    # src = open("CMSSW_sh.patch")
    # dst = open(tmp, "w")
    # for line in src:
    #     dst.write(line.replace("%%INPUT%%", opts.input))
    # src.close()
    # dst.close()
#    os.remove(tmp)

    patch = ""
    src = open(os.path.join(os.environ["CMSSW_BASE"], "src/HiggsAnalysis/HeavyChHiggsToTauNu/test/tauEmbedding/CMSSW_sh.patch"))
    for line in src:
        patch += line.replace("%%INPUT%%", opts.input)
    src.close()
#    print patch

    taskDirs = multicrab.getTaskDirectories(opts)
    for d in taskDirs:
        cmd = ["patch", "-p0", os.path.join(d, "job", "CMSSW.sh")]
        p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
        p.communicate(patch)
        if p.returncode != 0:
            return p.returncode

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    exe_re = re.compile("ExeTime=(?P<time>\d+)")
    user_re = re.compile("CrabUserCpuTime=(?P<time>\d+(\.\d+)?)")
    sys_re = re.compile("CrabSysCpuTime=(?P<time>\d+(\.\d+)?)")

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))

        if len(files) == 0:
            continue

        exe_times = []
        user_times = []
        sys_times = []
        for name in files:
            f = open(name)
            for line in f:
                m = exe_re.search(line)
                if m:
                    exe_times.append(float(m.group("time")))
                    continue
                m = user_re.search(line)
                if m:
                    user_times.append(float(m.group("time")))
                    continue
                m = sys_re.search(line)
                if m:
                    sys_times.append(float(m.group("time")))
            f.close()
        print "Task %s, %d jobs" % (task, len(files))
        print times("Exe", exe_times)
        print times("User", user_times)
        print times("Sys", sys_times)
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    exe_re = re.compile("ExeTime=(?P<time>\d+)")
    user_re = re.compile("CrabUserCpuTime=(?P<time>\d+(\.\d+)?)")
    sys_re = re.compile("CrabSysCpuTime=(?P<time>\d+(\.\d+)?)")

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))

        if len(files) == 0:
            continue

        exe_times = []
        user_times = []
        sys_times = []
        for name in files:
            f = open(name)
            for line in f:
                m = exe_re.search(line)
                if m:
                    exe_times.append(float(m.group("time")))
                    continue
                m = user_re.search(line)
                if m:
                    user_times.append(float(m.group("time")))
                    continue
                m = sys_re.search(line)
                if m:
                    sys_times.append(float(m.group("time")))
            f.close()
        print "Task %s, %d jobs" % (task, len(files))
        print times("Exe", exe_times)
        print times("User", user_times)
        print times("Sys", sys_times)
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S")
    log = open(logfile, "w")

    for task in taskDirs:
        if not os.path.exists(task):
            print "Skipping task %s, directory doesnt exist" %  task
            continue
        ret = publish(task, log) 
        if ret != 0:
            print "Publish error (%d) with task %s, see %s for details" % (ret, task, logfile)
            log.close()
            return 1

        if opts.report:
            ret = report(task, log)
            if ret != 0:
                print "Report error (%d) with task %s, see %s for details" % (ret, task, logfile)
                log.close()
                return 1

        if opts.move:
            shutil.move(task, task+"_published")

    log.close()    
    return 0
Example #7
0
def main(opts, args):
    workdir = os.getcwd()

    crabdirs = multicrab.getTaskDirectories(opts)
    for d in crabdirs:
        print "Making export pack of %s" % d

        # Create the tar archive
        filename = "%s_export.tgz" % d
        taroptions = "w:gz"
        if opts.fullStatus:
            filename = "%s_export_unmerged.tar" % d
            taroptions = "w:"
        tar = tarfile.open(filename, mode="%s" % taroptions)

        # Check if everything is ok
        if not os.path.exists(d + "/lumi.json"):
            print "... Could not find lumi.json, if you wish to include it, run hplusLumiCalc.py"
        else:
            print "  adding file %s/lumi.json" % d
            tar.add("%s/lumi.json" % d)
        if not os.path.exists(d + "/timeReport.txt"):
            print "... Could not find timeReport.txt, if you wish to include it run hplusMultiCrabAnalysis --time >! timeReport.txt"
        else:
            print " adding timeReport.txt"
            tar.add("%s/timeReport.txt" % d)

        if os.path.exists(d + "/codeDiff.txt"):
            tar.add("%s/codeDiff.txt" % d)
        if os.path.exists(d + "/codeStatus.txt"):
            tar.add("%s/codeStatus.txt" % d)
        if os.path.exists(d + "/codeVersion.txt"):
            tar.add("%s/codeVersion.txt" % d)
        for jobdir in glob.glob(d + "/*/job"):
            tar.add(jobdir)

        if opts.fullStatus:
            for f in [
                    "%s/*py" % d,
                    "%s/*cfg" % d,
                    "%s/*/res/histograms_*root" % d
            ]:
                list = glob.glob(f)
                for i in list:
                    print "  adding file ", i
                    tar.add(i)
        else:
            for f in [
                    "%s/*py" % d,
                    "%s/*cfg" % d,
                    "%s/*/res/histograms-*root" % d
            ]:
                list = glob.glob(f)
                for i in list:
                    print "  adding file ", i
                    tar.add(i)
        tar.close()
        print "Written file %s" % filename
Example #8
0
def readDatasetDirs(opts, crossSections, datasets):
    taskdirs = multicrab.getTaskDirectories(opts)
    for d in taskdirs:
        files = glob.glob(os.path.join(d, "res", opts.input))
        if len(files) > 1:
            raise Exception("Only one file should match the input (%d matched) for task %s" % (len(files), d))
            return 1
        elif len(files) == 0:
            raise Exception("No files matched to input for task %s" % d)

        datasets.append(counter.readDataset(files[0], opts.counterdir, d, crossSections))
Example #9
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    analyses = []
    watchdogAnalyses = []
    if opts.time:
        analyses.append(TimeAnalysis())
    if opts.size:
        analyses.append(SizeAnalysis(opts.sizeFile))
    if opts.memory:
        watchdogAnalyses.append(MemoryAnalysis())
    if opts.host:
        analyses.append(HostAnalysis())
    if opts.watchdog:
        analyses.append(WatchdogAnalysis())

    if len(analyses) + len(watchdogAnalyses) == 0:
        return 1

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))
        files = excludeInclude(files, opts.include, opts.exclude)
        wfiles = []
        if len(watchdogAnalyses) > 0:
            wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz"))
            wfiles = excludeInclude(wfiles, opts.include, opts.exclude)

        if opts.byStatus:
            try:
                jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
            except Exception:
                if not opts.allowFails:
                    raise
                print "%s: crab -status failed" % task
                continue
            print "Task %s" % task
            # Ignore running jobs
            for status in multicrabStatus.order_run:
                if status in jobs:
                    del jobs[status]
            stats = jobs.keys()
            stats.sort()
            for status in stats:
                ids = ",".join(["%d" % j.id for j in jobs[status]])
                f = excludeInclude(files, ids)
                wf = excludeInclude(wfiles, ids)
                print " %s, %d jobs" % (status, len(files))
                analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ")
        else:
            print "Task %s, %d jobs" % (task, len(files))
            analyseTask(files, wfiles, analyses, watchdogAnalyses)

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    analyses = []
    watchdogAnalyses = []
    if opts.time:
        analyses.append(TimeAnalysis())
    if opts.size:
        analyses.append(SizeAnalysis(opts.sizeFile))
    if opts.memory:
        watchdogAnalyses.append(MemoryAnalysis())
    if opts.host:
        analyses.append(HostAnalysis())
    if opts.watchdog:
        analyses.append(WatchdogAnalysis())

    if len(analyses)+len(watchdogAnalyses) == 0:
        return 1

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))
        files = excludeInclude(files, opts.include, opts.exclude)
        wfiles = []
        if len(watchdogAnalyses) > 0:
            wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz"))
            wfiles = excludeInclude(wfiles, opts.include, opts.exclude)

        if opts.byStatus:
            try:
                jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
            except Exception:
                if not opts.allowFails:
                    raise
                print "%s: crab -status failed" % task
                continue
            print "Task %s" % task
            # Ignore running jobs
            for status in multicrabStatus.order_run:
                if status in jobs:
                    del jobs[status]
            stats = jobs.keys()
            stats.sort()
            for status in stats:
                ids = ",".join(["%d"%j.id for j in jobs[status]])
                f = excludeInclude(files, ids)
                wf = excludeInclude(wfiles, ids)
                print " %s, %d jobs" % (status, len(files))
                analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ")
        else:
            print "Task %s, %d jobs" % (task, len(files))
            analyseTask(files, wfiles, analyses, watchdogAnalyses)

    return 0
Example #11
0
def readDatasetDirs(opts, crossSections, datasets):
    taskdirs = multicrab.getTaskDirectories(opts)
    for d in taskdirs:
        files = glob.glob(os.path.join(d, "res", opts.input))
        if len(files) > 1:
            raise Exception(
                "Only one file should match the input (%d matched) for task %s"
                % (len(files), d))
            return 1
        elif len(files) == 0:
            raise Exception("No files matched to input for task %s" % d)

        datasets.append(
            counter.readDataset(files[0], opts.counterdir, d, crossSections))
def main(opts, args):
    workdir = os.getcwd()

    crabdirs = multicrab.getTaskDirectories(opts)
    for d in crabdirs:
        print "Making export pack of %s"%d

        # Create the tar archive
        filename = "%s_export.tgz"%d
        taroptions = "w:gz"
        if opts.fullStatus:
            filename = "%s_export_unmerged.tar"%d
            taroptions = "w:"
        tar = tarfile.open(filename, mode="%s"%taroptions)

        # Check if everything is ok
        if not os.path.exists(d+"/lumi.json"):
            print "... Could not find lumi.json, if you wish to include it, run hplusLumiCalc.py"
	else:
            print "  adding file %s/lumi.json"%d
            tar.add("%s/lumi.json"%d)
        if not os.path.exists(d+"/timeReport.txt"):
            print "... Could not find timeReport.txt, if you wish to include it run hplusMultiCrabAnalysis --time >! timeReport.txt"
        else:
            print " adding timeReport.txt"
            tar.add("%s/timeReport.txt"%d)

        if os.path.exists(d+"/codeDiff.txt"):
            tar.add("%s/codeDiff.txt"%d)
        if os.path.exists(d+"/codeStatus.txt"):
            tar.add("%s/codeStatus.txt"%d)
        if os.path.exists(d+"/codeVersion.txt"):
            tar.add("%s/codeVersion.txt"%d)
        for jobdir in glob.glob(d+"/*/job"):
            tar.add(jobdir) 

        if opts.fullStatus:
            for f in ["%s/*py"%d, "%s/*cfg"%d, "%s/*/res/histograms_*root"%d]:
                list = glob.glob(f)
                for i in list:
                    print "  adding file ",i
                    tar.add(i)
        else:
            for f in ["%s/*py"%d, "%s/*cfg"%d, "%s/*/res/histograms-*root"%d]:
                list = glob.glob(f)
                for i in list:
                    print "  adding file ",i
                    tar.add(i)
        tar.close()
        print "Written file %s"%filename
Example #13
0
def main(opts, args):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    allJobs = []
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)
        if not "Created" in jobs:
            print "%s: no 'Created' jobs to submit" % task
            continue
        allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))

    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            multicrab._addToDictList(jobsToSubmit, job.task, job.id)

        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            command = ["crab", "-c", task, "-submit", pretty] + args
            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                ret = subprocess.call(command)
                if ret != 0:
                    message = "Command '%s' failed with exit code %d" % (
                        " ".join(command), ret)
                    if opts.allowFails:
                        print message
                    else:
                        raise Exception()
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
def main(opts, args):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    allJobs = []
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)
        if not "Created" in jobs:
            print "%s: no 'Created' jobs to submit" % task
            continue
        allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))

    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit    
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            multicrab._addToDictList(jobsToSubmit, job.task, job.id)

        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            command = ["crab", "-c", task, "-submit", pretty] + args
            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                ret = subprocess.call(command)
                if ret != 0:
                    message = "Command '%s' failed with exit code %d" % (" ".join(command), ret)
                    if opts.allowFails:
                        print message
                    else:
                        raise Exception()
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
def main(opts, args):
    crabdirs = multicrab.getTaskDirectories(opts)

    global re_histo
    re_histo = re.compile("^output files:.*?(?P<file>%s)" % opts.input)

    mergedFiles = []
    for d in crabdirs:
        d = d.replace("/", "")
        stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout"))

        files = []
        for f in stdoutFiles:
            try:
                histoFile = getHistogramFile(f)
                if histoFile != None:
                    files.append(os.path.join(os.path.dirname(f), histoFile))
                else:
                    print "Skipping task %s, job %s: input root file not found" % (
                        d, f)
            except multicrab.ExitCodeException, e:
                print "Skipping task %s, job %s: %s" % (d, f, str(e))

        if len(files) == 0:
            print "Task %s, skipping, no files to merge" % d
            continue
        print "Task %s, merging %d file(s)" % (d, len(files))

        mergeName = os.path.join(d, "res", opts.output % d)
        #cmd = "mergeTFileServiceHistograms -o %s -i %s" % ("histograms-"+d+".root", " ".join(files))
        #print files
        #ret = subprocess.call(["mergeTFileServiceHistograms",
        #                       "-o", mergeName,
        #                       "-i"]+files)
        if os.path.exists(mergeName):
            shutil.move(mergeName, mergeName + ".backup")

        p = subprocess.Popen(["hadd", mergeName] + files,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        output = p.communicate()[0]
        ret = p.returncode
        if ret != 0:
            print output
            print "Merging failed with exit code %d" % ret
            return 1
        mergedFiles.append((mergeName, len(files)))
Example #16
0
def main(opts, args):
    workdir = os.getcwd()

    crabdirs = multicrab.getTaskDirectories(opts)
    for d in crabdirs:
        # Go to task directory (in order to get the paths in the archive correctly easily)
        os.chdir(d)

        # Remove default.tgz
        tmp = os.path.join("share", "default.tgz")
        if os.path.exists(tmp):
            os.remove(tmp)

        # Create the tar archive
        tar = tarfile.open("task.tar.gz", mode="w:gz")
        files = []
        for f in ["CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*"]:
            files.extend(glob.glob(os.path.join("res", f)))
        
        files.extend(["job", "log", "share"])

        #print "\n".join(files)

        for f in files:
            tar.add(f)
        tar.close()

        # Keep share/crab.cfg
        files.remove("share")
        sharefiles = glob.glob(os.path.join("share", "*"))
        sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles)
        files.extend(sharefiles)

        # Delete the files just added to the archive
        for f in files:
            if os.path.isfile(f):
                #print "rm "+f
                os.remove(f)
            elif os.path.isdir(f):
                #print "rm -fR "+f
                shutil.rmtree(f)
            else:
                print "Not removing "+f
        print "Compacted", d

        os.chdir(workdir)
def main(opts, args):
    crabdirs = multicrab.getTaskDirectories(opts)

    global re_histo
    re_histo = re.compile("^output files:.*?(?P<file>%s)" % opts.input)

    mergedFiles = []
    for d in crabdirs:
        d = d.replace("/", "")
        stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout"))

        files = []
        for f in stdoutFiles:
            try:
                histoFile = getHistogramFile(f)
                if histoFile != None:
                    files.append(os.path.join(os.path.dirname(f), histoFile))
                else:
                    print "Skipping task %s, job %s: input root file not found" % (d, f)
            except multicrab.ExitCodeException, e:
                print "Skipping task %s, job %s: %s" % (d, f, str(e))
            
        if len(files) == 0:
            print "Task %s, skipping, no files to merge" % d
            continue
        print "Task %s, merging %d file(s)" % (d, len(files))

        mergeName = os.path.join(d, "res", opts.output % d)
        #cmd = "mergeTFileServiceHistograms -o %s -i %s" % ("histograms-"+d+".root", " ".join(files))
        #print files
        #ret = subprocess.call(["mergeTFileServiceHistograms",
        #                       "-o", mergeName,
        #                       "-i"]+files)
        if os.path.exists(mergeName):
            shutil.move(mergeName, mergeName+".backup")

        p = subprocess.Popen(["hadd", mergeName]+files, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        output = p.communicate()[0]
        ret = p.returncode
        if ret != 0:
            print output
            print "Merging failed with exit code %d" % ret
            return 1
        mergedFiles.append((mergeName, len(files)))
Example #18
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    logfile = "publish_%s.log" % time.strftime("%y%m%d_%H%M%S")
    log = open(logfile, "w")

    for task in taskDirs:
        if not os.path.exists(task):
            print "Skipping task %s, directory doesnt exist" %  task
            continue
        ret = publish(task, log) 
        if ret != 0:
            print "Publish error (%d) with task %s, see %s for details" % (ret, task, logfile)
            log.close()
            return 1

        if opts.report:
            ret = report(task, log)
            if ret != 0:
                print "Report error (%d) with task %s, see %s for details" % (ret, task, logfile)
                log.close()
                return 1

    log.close()

    # See if publication is complete, report if not and possibly move if is
    log = open(logfile)
    tasks = {}
    for d in taskDirs:
        tasks[d] = printPublished.Task(d)

    printPublished.addInputPublishToTasks(tasks)
    printPublished.parseLog(logfile, tasks)

    for key, task in tasks.iteritems():
        if task.jobs_still_to_publish > 0:
            print "%s publication not complete, not moving (published %d, failed %d, still_to_publish %d)" % (key, task.jobs_published, task.jobs_failed, task.jobs_still_to_publish)
        elif opts.move:
            shutil.move(key, key+"_published")

    log.close()

    return 0
Example #19
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobsList = []
        if opts.getoutput is None:
            jobs = multicrab.crabStatusToJobs(task, printCrab=False)
            for key in jobs.keys():
                if "Done" in key:
                    jobsList.extend([j.id for j in jobs[key]])
        else:
            jobsList.extend(multicrab.prettyToJobList(opts.getoutput))
        if len(jobsList) == 0:
            print "%s: no jobs to retrieve" % task
            continue

        # Getoutput loop
        maxJobs = len(jobsList)
        if opts.jobs > 0:
            maxJobs = opts.jobs

        for i in xrange(0, int(math.ceil(float(len(jobsList))/maxJobs))):
            jobsToGet = jobsList[i*maxJobs:(i+1)*maxJobs]
            jobsStr = ",".join([str(j) for j in jobsToGet])
            command = ["crab", "-c", task, "-getoutput", jobsStr]
            print "Getting %d jobs from task %s" % (len(jobsToGet), task)
            print "Command", " ".join(command)
            ret = subprocess.call(command)
            if ret != 0:
                print "Command '%s' failed with exit code %s" % (" ".join(command), ret)
                if not opts.allowFails:
                    return 1

    return 0
def main(opts):
    if len(opts.dirs) == 0:
        print "No directories given"
        return 1

    # (tmph, tmp) = tempfile.mkstemp(suffix=".patch")
    # print tmp

    # src = open("CMSSW_sh.patch")
    # dst = open(tmp, "w")
    # for line in src:
    #     dst.write(line.replace("%%INPUT%%", opts.input))
    # src.close()
    # dst.close()


#    os.remove(tmp)

    patch = ""
    src = open(
        os.path.join(
            os.environ["CMSSW_BASE"],
            "src/HiggsAnalysis/HeavyChHiggsToTauNu/test/tauEmbedding/CMSSW_sh.patch"
        ))
    for line in src:
        patch += line.replace("%%INPUT%%", opts.input)
    src.close()
    #    print patch

    taskDirs = multicrab.getTaskDirectories(opts)
    for d in taskDirs:
        cmd = ["patch", "-p0", os.path.join(d, "job", "CMSSW.sh")]
        p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
        p.communicate(patch)
        if p.returncode != 0:
            return p.returncode

    return 0
def main(opts):
    tdrstyle.TDRStyle()
    # loop over datasets
    histos = []
    labels = []
    for multicrabDir in opts.multicrabdir:
        crabDirs = multicrab.getTaskDirectories(None, os.path.join(multicrabDir, "multicrab.cfg"))
        for crabDir in crabDirs:
            taskName = os.path.split(crabDir)[1]
            rootFile = ROOT.TFile.Open(os.path.join(crabDir, "res", "histograms-%s.root"%taskName))
            if rootFile.IsZombie():
                raise Exception ("Error: File 'histograms-%s.root' not found!"%taskName)
            # Get histogram
            histoName = "signalAnalysis/SignalSelectionFlowVsVertices"
            if opts.variation != None:
                histoName = "%s/SignalSelectionFlowVsVertices"%opts.variation[0]
            h = rootFile.Get(histoName)
            if h == None:
                raise Exception ("Error: histogram '%s' not found in ile 'histograms-%s.root'!"%(histoName,taskName))
            histos.append(h)
            labels.append(taskName)
    # We have the histograms and names, lets loop over the selection steps
    makePlots(histos,labels,False)
    makePlots(histos,labels,True)
def main(opts, args):
    workdir = os.getcwd()

    crabdirs = multicrab.getTaskDirectories(opts)
    for d in crabdirs:
        # Run crab -report
        if opts.report:
            multicrab.checkCrabInPath()
            cmd = ["crab", "-report", "-c", d]
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
            output = p.communicate()[0]
            ret = p.returncode
            if ret != 0:
                print "Call to 'crab -report -d %s' failed with return value %d" % (
                    d, ret)
                print output
                return 1

        if os.path.exists(os.path.join(d, "task.tar.gz")):
            print "Skipping %s, task.tar.gz already exists" % d
            continue

        # Go to task directory (in order to get the paths in the archive correctly easily)
        os.chdir(d)

        # Remove default.tgz
        tmp = os.path.join("share", "default.tgz")
        if os.path.exists(tmp):
            os.remove(tmp)

        # Create the tar archive
        tar = tarfile.open("task.tar.gz", mode="w:gz")
        files = []
        for f in [
                "CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*",
                "Watchdog_*.log*"
        ]:
            files.extend(glob.glob(os.path.join("res", f)))

        files.extend(["job", "log", "share"])

        #print "\n".join(files)

        for f in files:
            if os.path.exists(f):
                tar.add(f)
        tar.close()

        # Keep share/crab.cfg
        files.remove("share")
        sharefiles = glob.glob(os.path.join("share", "*"))
        sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles)
        files.extend(sharefiles)

        # Delete the files just added to the archive
        for f in files:
            if os.path.isfile(f):
                #print "rm "+f
                try:
                    os.remove(f)
                except OSError, e:
                    print "Warning: failed to remove %s: %s" % (f, str(e))
            elif os.path.isdir(f):
                #print "rm -fR "+f
                try:
                    shutil.rmtree(f)
                except OSError, e:
                    print "Warning: failed to remove %s: %s" % (f, str(e))
Example #23
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    if opts.byHost:
        global status_format
        status_format = status_format.replace("18s", "40s")

    if opts.save:
        out = open(opts.saveFile, "w")

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print >> sys.stderr, "%s: Task directory missing" % task
            continue

        try:
            jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
        except Exception:
            if not opts.allowFails:
                raise
            print "%s: crab -status failed" % task
            continue

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    aux.addToDictList(hosts, job.host, job)
            if opts.byHost:
                for host, joblist in hosts.iteritems():
                    jobSummaries[key + " " + host] = JobSummary(
                        joblist, [host])
            else:
                jobSummaries[key] = JobSummary(item, hosts)
            l = len(item)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]

        if opts.save:
            out.write(line)
            out.write("\n")
        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append((job.id, job.jobExitCode))
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                aux.addToDictList(failedJobs, jobCode,
                                  "%s/res/CMSSW_%d.stdout" % (task, jobId))

    summary = StringIO.StringIO()

    summary.write("----------------------------------------\n")
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            summary.write(status_format % (s + ":", stats[s]))
            summary.write("\n")
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s + ":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        summary.write(status_format % (key + ":", stats[key]))
        summary.write("\n")
    for line in b:
        summary.write(line)
        summary.write("\n")

    summary.write("----------------------------------------\n")
    if len(resubmitJobs) == 0:
        summary.write("No failed/aborted jobs to resubmit\n")
    else:
        summary.write(
            "Following jobs failed/aborted, and can be resubmitted\n\n")
        for task in taskDirs:
            if task in resubmitJobs:
                summary.write("crab -c %s -resubmit %s\n" %
                              (task, resubmitJobs[task]))
        summary.write("\n")

    if opts.failedLogs:
        summary.write("----------------------------------------\n")
        summary.write("Log files of failed jobs\n")
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            summary.write("\nJob exit code %d:\n" % code)
            summary.write("\n".join(failedJobs[code]))
            summary.write("\n")

    if opts.save:
        out.write(summary.getvalue())
        out.close()
    print summary.getvalue()

    return 0
def main(opts, args):
    workdir = os.getcwd()

    crabdirs = multicrab.getTaskDirectories(opts)
    for d in crabdirs:
        # Run crab -report
        if opts.report:
            multicrab.checkCrabInPath()
            cmd = ["crab", "-report", "-c", d]
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            output = p.communicate()[0]
            ret = p.returncode
            if ret != 0:
                print "Call to 'crab -report -d %s' failed with return value %d" % (d, ret)
                print output
                return 1

        if os.path.exists(os.path.join(d, "task.tar.gz")):
            print "Skipping %s, task.tar.gz already exists" % d
            continue

        # Go to task directory (in order to get the paths in the archive correctly easily)
        os.chdir(d)

        # Remove default.tgz
        tmp = os.path.join("share", "default.tgz")
        if os.path.exists(tmp):
            os.remove(tmp)

        # Create the tar archive
        tar = tarfile.open("task.tar.gz", mode="w:gz")
        files = []
        for f in ["CMSSW_*.std*", "crab_fjr_*.xml", "Submission_*", "Watchdog_*.log*"]:
            files.extend(glob.glob(os.path.join("res", f)))
        
        files.extend(["job", "log", "share"])

        #print "\n".join(files)

        for f in files:
            if os.path.exists(f):
                tar.add(f)
        tar.close()

        # Keep share/crab.cfg
        files.remove("share")
        sharefiles = glob.glob(os.path.join("share", "*"))
        sharefiles = filter(lambda x: not "crab.cfg" in x, sharefiles)
        files.extend(sharefiles)

        # Delete the files just added to the archive
        for f in files:
            if os.path.isfile(f):
                #print "rm "+f
                try:
                    os.remove(f)
                except OSError, e:
                    print "Warning: failed to remove %s: %s" % (f, str(e))
            elif os.path.isdir(f):
                #print "rm -fR "+f
                try:
                    shutil.rmtree(f)
                except OSError, e:
                    print "Warning: failed to remove %s: %s" % (f, str(e))
def main(opts, args):
    crabdirs = multicrab.getTaskDirectories(opts)

    global re_histos
    re_histos.append(re.compile("^output files:.*?(?P<file>%s)" % opts.input))
    re_histos.append(re.compile("^\s+file\s+=\s+(?P<file>%s)" % opts.input))

    mergedFiles = []
    for d in crabdirs:
        d = d.replace("/", "")
        stdoutFiles = glob.glob(os.path.join(d, "res", "CMSSW_*.stdout"))

        files = []
        for f in stdoutFiles:
            try:
                if opts.filesInSE:
                    histoFile = getHistogramFileSE(f, opts)
                    if histoFile != None:
                        files.append(histoFile)
                    else:
                        print "Task %s, skipping job %s: input root file not found from stdout" % (d, f)
                else:
                    histoFile = getHistogramFile(f, opts)
                    if histoFile != None:
                        path = os.path.join(os.path.dirname(f), histoFile)
                        if os.path.exists(path):
                            files.append(path)
                        else:
                            print "Task %s, skipping job %s: input root file found from stdout, but does not exist" % (d, f)
                    else:
                        print "Task %s, skipping job %s: input root file not found from stdout" % (d, f)
            except multicrab.ExitCodeException, e:
                print "Task %s, skipping job %s: %s" % (d, f, str(e))

        if len(files) == 0:
            print "Task %s, skipping, no files to merge" % d
            continue
        for f in files:
            if not os.path.isfile(f):
                raise Exception("File %s is marked as output file in the  CMSSW_N.stdout, but does not exist" % f)

        filesSplit = splitFiles(files, opts.filesPerMerge)
        if len(filesSplit) == 1:
            print "Task %s, merging %d file(s)" % (d, len(files))
        else:
            print "Task %s, merging %d file(s) to %d files" % (d, len(files), len(filesSplit))

        for index, inputFiles in filesSplit:
            tmp = d
            if len(filesSplit) > 1:
                tmp += "-%d" % index
            mergeName = os.path.join(d, "res", opts.output % tmp)
            if os.path.exists(mergeName) and not opts.test:
                if opts.verbose:
                    print "mv %s %s" % (mergeName, mergeName+".backup")
                shutil.move(mergeName, mergeName+".backup")

            # FIXME: add here reading of first xrootd file, finding all TTrees, and writing the TList to mergeName file
            if opts.filesInSE:
                raise Exception("--filesInSE feature is not fully implemented")

            if len(inputFiles) == 1:
                if opts.verbose:
                    print "cp %s %s" % (inputFiles[0], mergeName)
                if not opts.test:
                    shutil.copy(inputFiles[0], mergeName)
                
            else:
                if opts.fast:
                    ret = hplusHadd(opts, mergeName, inputFiles)
                    if ret != 0:
                        return ret
                else:
                    ret = hadd(opts, mergeName, inputFiles)
                    if ret != 0:
                        return ret
    
            if len(filesSplit) > 1:
                print "  done %d" % index
            mergedFiles.append((mergeName, inputFiles))
            try:
                sanityCheck(mergeName, inputFiles)
            except SanityCheckException, e:
                print "Task %s: %s; disabling input file deletion" % (d, str(e))
                opts.deleteImmediately = False
                opts.delete = False
            if opts.deleteImmediately:
                for srcFile in inputFiles:
                    if opts.verbose:
                        print "rm %s" % srcFile
                    if not opts.test:
                        os.remove(srcFile)
Example #26
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    hosts[job.host] = 1
            l = len(item)
            jobSummaries[key] = JobSummary(item, hosts)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]

        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append((job.id, job.jobExitCode))
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                multicrab._addToDictList(
                    failedJobs, jobCode,
                    "%s/res/CMSSW_%d.stdout" % (task, jobId))

    print "----------------------------------------"
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            print status_format % (s + ":", stats[s])
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s + ":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        print status_format % (key + ":", stats[key])
    for line in b:
        print line

    print "----------------------------------------"
    if len(resubmitJobs) == 0:
        print "No failed/aborted jobs to resubmit"
    else:
        print "Following jobs failed/aborted, and can be resubmitted"
        print
        for task in taskDirs:
            if task in resubmitJobs:
                print "crab -c %s -resubmit %s" % (task, resubmitJobs[task])
        print

    if opts.failedLogs:
        print "----------------------------------------"
        print "Log files of failed jobs"
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            print
            print "Job exit code %d:" % code
            print "\n".join(failedJobs[code])

    return 0
Example #27
0
def main(opts, args):
    if opts.report:
        multicrab.checkCrabInPath()

    cell = "\|\s+(?P<%s>\S+)\s+"

    lumi_re = re.compile((cell % "deliveredls") + (cell % "delivered") +
                         (cell % "selectedls") + (cell % "recorded") + "\|")
    #lumi_re = re.compile("\|\s(?P<recorded>\S+)\s")
    unit_re = re.compile("Recorded\(/(?P<unit>.*)\)")

    if not opts.truncate and os.path.exists(opts.output):
        f = open(opts.output, "r")
        data = json.load(f)
        f.close()

    files = []
    # only if no explicit files, or some directories explicitly given
    if len(opts.files) == 0 or len(opts.dirs) > 0:
        crabdirs = multicrab.getTaskDirectories(opts)
        for d in crabdirs:
            if isMCTask(d):
                print "  Ignoring task directory '%s', it looks like MC" % d
                continue

            if opts.report:
                cmd = ["crab", "-report", "-c", d]
                if opts.verbose:
                    print " ".join(cmd)
                p = subprocess.Popen(cmd,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.STDOUT)
                output = p.communicate()[0]
                ret = p.returncode
                if ret != 0:
                    print "Call to 'crab -report -d %s' failed with return value %d" % (
                        d, ret)
                    print output
                    return 1
                if opts.verbose:
                    print output

            files.append((d, os.path.join(d, "res", "lumiSummary.json")))
    files.extend([(None, f) for f in opts.files])

    data = {}
    for task, jsonfile in files:
        #print
        #print "================================================================================"
        #print "Dataset %s:" % d
        cmd = [
            "lumiCalc2.py", "-i", jsonfile, "--nowarning", "overview", "-b",
            "stable"
        ]
        if opts.lumicalc1:
            cmd = [
                "lumiCalc.py", "-i", jsonfile, "--with-correction",
                "--nowarning", "overview", "-b", "stable"
            ]
        #cmd = ["lumiCalc.py", "-c", "frontier://LumiCalc/CMS_LUMI_PROD", "-r", "132440", "--nowarning", "overview"]
        #ret = subprocess.call(cmd)
        if opts.verbose:
            print " ".join(cmd)
        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        output = p.communicate()[0]
        ret = p.returncode
        if ret != 0:
            print "Call to lumiCalc.py failed with return value %d with command" % ret
            print " ".join(cmd)
            print output
            return 1
        if opts.verbose:
            print output

        lines = output.split("\n")
        lines.reverse()
        lumi = -1.0
        unit = None
        for line in lines:
            m = unit_re.search(line)
            if m:
                unit = m.group("unit")
                break

            m = lumi_re.search(line)
            if m:
                lumi = float(m.group("recorded"))  # lumiCalc2.py returns pb^-1
                #                if opts.lumicalc1:
                #                    lumi = lumi/1e6 # ub^-1 -> pb^-1, lumiCalc.py returns ub^-1
                continue

        if unit == None:
            raise Exception(
                "Didn't find unit information from lumiCalc output, command was %s"
                % " ".join(cmd))
        lumi = convertLumi(lumi, unit)

        if task == None:
            print "File %s recorded luminosity %f pb^-1" % (jsonfile, lumi)
        else:
            print "Task %s recorded luminosity %f pb^-1" % (task, lumi)
            data[task] = lumi

        # Save the json file after each data task in case of future errors
        if len(data) > 0:
            f = open(opts.output, "wb")
            json.dump(data, f, sort_keys=True, indent=2)
            f.close()

    if len(data) > 0:
        f = open(opts.output, "wb")
        json.dump(data, f, sort_keys=True, indent=2)
        f.close()

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print "%s: Task directory missing" % task
            continue
        
        jobs = multicrab.crabStatusToJobs(task)

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    hosts[job.host] = 1
            l = len(item)
            jobSummaries[key] = JobSummary(item, hosts)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]
        
        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append( (job.id, job.jobExitCode) )
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                multicrab._addToDictList(failedJobs, jobCode, "%s/res/CMSSW_%d.stdout" % (task, jobId))
    
    print "----------------------------------------"
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            print status_format % (s+":", stats[s])
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s+":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        print status_format % (key+":", stats[key])
    for line in b:
        print line


    print "----------------------------------------"
    if len(resubmitJobs) == 0:
        print "No failed/aborted jobs to resubmit"
    else:
        print "Following jobs failed/aborted, and can be resubmitted"
        print
        for task in taskDirs:
            if task in resubmitJobs:
                print "crab -c %s -resubmit %s" % (task, resubmitJobs[task])
        print

    if opts.failedLogs:
        print "----------------------------------------"
        print "Log files of failed jobs"
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            print
            print "Job exit code %d:" % code
            print "\n".join(failedJobs[code])

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitMode = (len(opts.resubmit) > 0)
    resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"])

    if resubmitMode and resubmitIdListMode and len(taskDirs) != 1:
        print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len(taskDirs)
        return 1

    if resubmitMode and resubmitIdListMode:
        resubmitJobList = multicrab.prettyToJobList(opts.resubmit)

    # Obtain all jobs to be (re)submitted
    allJobs = []
    seBlackLists = {}
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        cfgparser = ConfigParser.ConfigParser()
        cfgparser.read(os.path.join(task, "share", "crab.cfg"))
        if cfgparser.has_section("GRID"):
            availableOptions = cfgparser.options("GRID")
            blacklist = None
            for ao in availableOptions:
                if ao.lower() == "se_black_list":
                    blacklist = cfgparser.get("GRID", ao)
                    break
            seBlackLists[task] = blacklist

        jobs = multicrab.crabStatusToJobs(task, printCrab=False)
        if not resubmitMode: # normal submission
            if not "Created" in jobs:
                print "%s: no 'Created' jobs to submit" % task
                continue
            allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))
        elif not resubmitIdListMode: # resubmit all failed jobs
            status = "all"
            if opts.resubmit == "aborted":
                status = "aborted"
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.failed(status):
                        allJobs.append(job)
        else: # resubmit explicit list of jobs
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.id in resubmitJobList:
                        allJobs.append(job)
                        resubmitJobList.remove(job.id)

    # Set the number of maximum jobs to submit
    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    submitCommand = "-submit"
    if len(opts.resubmit) > 0:
        submitCommand = "-resubmit"

    sites = []
    siteSubmitIndex = 0
    if len(opts.toSites) > 0:
        sites = opts.toSites.split(",")

    # Submission loop
    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        # Construct list of jobs per task to submit
        njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit    
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            aux.addToDictList(jobsToSubmit, job.task, job.id)

        # If explicit list of sites to submit was given, get the site to submit this time
        crabOptions = []
        if len(sites) > 0:
            site = sites[siteSubmitIndex]
            siteSubmitIndex = (siteSubmitIndex+1) % len(sites)
            crabOptions.append("-GRID.se_black_list= -GRID.se_white_list="+site)

        # Actual submission
        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            if len(jobs) == 1:
                pretty += "," # CRAB thinks one number is number of jobs, the comma translates it to job ID
            command = ["crab", "-c", task, submitCommand, pretty]
            if opts.crabArgs != "":
                command.extend(opts.crabArgs.split(" "))
            if len(crabOptions) > 0:
                command.extend(crabOptions)
            if opts.addSeBlackList != "":
                lst = seBlackLists[task]
                if lst is None:
                    lst = opts.addSeBlackList
                else:
                    lst += ","+opts.addSeBlackList
                command.extend(["-GRID.se_black_list="+lst])

            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                timesLeft = 1
                if opts.tryAgainTimes > 0:
                    timesLeft = opts.tryAgainTimes
                while timesLeft > 0:
                    ret = subprocess.call(command)
                    if ret == 0:
                        break
                    else:
                        timesLeft -= 1
                        message = "Command '%s' failed with exit code %d" % (" ".join(command), ret)
                        if opts.allowFails:
                            print message
                        if opts.tryAgainTimes > 0:
                            print message
                            if timesLeft > 0:
                                print "Trying again after %d seconds (%d trials left)" % (opts.tryAgainSeconds, timesLeft)
                                time.sleep(opts.tryAgainSeconds)
                            else:
                                print "No trials left, continuing with next job block"
                        else:
                            raise Exception()

        # Sleep between submissions
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    # Find task directories
    tasks = OrderedDict.OrderedDict()
    for d in taskDirs:
        if os.path.exists(d):
            tasks[d] = Task(d)
        elif os.path.exists(d + "_published"):
            print "%s: using directory %s_published" % (d, d)
            tasks[d] = Task(d + "_published")
        else:
            print "%s: directory not found, skipping" % d

    print

    # Find publish name from crab.cfg's
    addInputPublishToTasks(tasks)
    #print

    # Read publish.log files produced by hplusMultiCrabPublish.py
    publishLogs = glob.glob("publish_*.log")
    if len(publishLogs) == 0:
        print "Did not find any publish_*.log files, are you sure you've run hplusMultiCrabPublish?"
        return 1
    publishLogs.sort()
    for logFile in publishLogs:
        parseLog(logFile, tasks)

    #print

    # Check if publication is complete
    taskNames = tasks.keys()
    taskNames.sort()
    for name in taskNames:
        task = tasks[name]
        still = task.jobs_still_to_publish
        if still is not None and still > 0:
            print "%s publication not complete (published %d, failed %d, still_to_publish %d)" % (
                name, task.jobs_published, task.jobs_failed, still)
            del tasks[name]

    # Read time and size information
    timeAnalysis = multicrabAnalysis.TimeAnalysis()
    sizeAnalysis = multicrabAnalysis.SizeAnalysis(opts.sizeFile)
    analyses = [timeAnalysis, sizeAnalysis]
    print
    for key, task in tasks.iteritems():
        # For this we don't care if the jobs succeeded or not
        outFiles = glob.glob(
            os.path.join(task.directory, "res", "CMSSW_*.stdout"))
        if len(outFiles) == 0:
            print "%s: 0 CMSSW_*.stdout files, something is badly wrong!" % key
            sys.exit(1)

        multicrabAnalysis.analyseFiles(outFiles, analyses)
        task.jobs = len(outFiles)
        task.time = timeAnalysis.userTime()
        task.size = sizeAnalysis.size()

        npublished = task.jobs_published
        if npublished is not None and npublished != task.jobs:
            print "%s publication nto complete (published %d of %d jobs)" % (
                npublished, task.jobs)

    # Print out


#    print
#    for key, task in tasks.iteritems():
#        print "# %s events, %d jobs" % (task.events, task.jobs)
        print "# %d jobs" % (task.jobs)
        print "# %s" % task.time
        print "# %s" % task.size
        print '"%s": TaskDef("%s", dbs="phys03"),' % (key, task.dbsPath)

    return 0
def main(opts):
    # open PU histograms
    fmc = ROOT.TFile.Open(opts.mcPU)
    if fmc.IsZombie():
        sys.exit()
    hmcoriginal = fmc.Get("pileup")
    hmc = hmcoriginal.Clone("hmc")
    hmc.Scale(1.0 / hmc.Integral())
    fdata = ROOT.TFile.Open(opts.dataPU)
    if fdata.IsZombie():
        sys.exit()
    hdataoriginal = fdata.Get("pileup")
    hdata = hdataoriginal.Clone("hdata")
    hdata.Scale(1.0 / hdata.Integral())
    fdataup = ROOT.TFile.Open(opts.dataPU.replace(".root","up.root"))
    if fdataup.IsZombie():
        sys.exit()
    hdatauporiginal = fdataup.Get("pileup")
    hdataup = hdatauporiginal.Clone("hdataup")
    hdataup.Scale(1.0 / hdataup.Integral())
    fdatadown = ROOT.TFile.Open(opts.dataPU.replace(".root","down.root"))
    if fdatadown.IsZombie():
        sys.exit()
    hdatadownoriginal = fdatadown.Get("pileup")
    hdatadown = hdatadownoriginal.Clone("hdatadown")
    hdatadown.Scale(1.0 / hdatadown.Integral())

    hweight = hdata.Clone()
    hweight.Divide(hmc)

    hweightUp = hdataup.Clone()
    hweightUp.Divide(hmc)

    hweightDown = hdatadown.Clone()
    hweightDown.Divide(hmc)

    ntupleCache = dataset.NtupleCache("pileupNtuple/tree", "PileupWeightSelector",
                                      selectorArgs=[hweight, hweightUp, hweightDown],
                                      )

    topPtNames = ROOT.std.vector("string")()
    topPtFormulasAllHadr = ROOT.std.vector("string")()
    topPtFormulasSemiLep = ROOT.std.vector("string")()
    topPtFormulasDiLep = ROOT.std.vector("string")()
    print topPtNames
    for name, scheme in topPtWeightSchemes.schemes.iteritems():
        topPtNames.push_back(name)
        topPtFormulasAllHadr.push_back(scheme.allhadronic)
        topPtFormulasSemiLep.push_back(scheme.leptonjets)
        topPtFormulasDiLep.push_back(scheme.dilepton)

    ntupleCacheTTJets = dataset.NtupleCache("pileupNtuple/tree", "PileupWeightSelector",
                                            selectorArgs=[hweight, hweightUp, hweightDown, topPtNames, topPtFormulasAllHadr, topPtFormulasSemiLep, topPtFormulasDiLep],
                                            cacheFileName="histogramCacheTTJets.root"
                                            )


    # loop over datasets
    myoutput = ""
    for multicrabDir in opts.multicrabdir:
        crabDirs = multicrab.getTaskDirectories(None, os.path.join(multicrabDir, "multicrab.cfg"))
        for crabDir in crabDirs:
            taskName = os.path.split(crabDir)[1]
            rootFile = ROOT.TFile.Open(os.path.join(crabDir, "res", "histograms-%s.root"%taskName))
            if rootFile.IsZombie():
                sys.exit()

            # Create Dataset wrapper
            dset = DatasetWrapper(taskName, rootFile, multicrabDir)

            # Get tree for non-weighted number of events
            mytree = dset.getTree("pileupNtuple/tree")
            if mytree == 0:
                raise Exception("Did not find 'pileupNtuple/tree' from %s" % rootFile.GetName())
            nevents = mytree.GetEntries()

            nc = ntupleCache
            topPtWeighting = opts.doTopPt and "TTJets" in taskName
            if topPtWeighting:
                nc = ntupleCacheTTJets

            # Process tree
            nc.process(dset)

            # Get results
            def getResult(histo):
                return nc.getRootHisto(dset, histo, None).GetBinContent(1)
            nevt = getResult("events")
            nevtup = getResult("eventsUp")
            nevtdown = getResult("eventsDown")

            rootFile.Close()
            # Write output line
            if topPtWeighting:
                taskPrefix = "        "+'"'+taskName+'"'+": WeightedAllEventsTopPt("
                myline = taskPrefix+"unweighted = WeightedAllEvents(unweighted=%d, "%nevents+"weighted=%f, "%nevt+"up=%f, "%nevtup+"down=%f),\n"%nevtdown
                for name in topPtWeightSchemes.schemes.iterkeys():
                    def construct(prefix, histoPostfix, postfix):
                        top_nevt = getResult("events_topPt%s_%s"%(histoPostfix, name))
                        top_nevtup = getResult("eventsUp_topPt%s_%s"%(histoPostfix, name))
                        top_nevtdown = getResult("eventsDown_topPt%s_%s"%(histoPostfix, name))
                        return prefix + "=WeightedAllEvents(unweighted=%d, weighted=%f, up=%f, down=%f)" % (nevents, top_nevt, top_nevtup, top_nevtdown) + postfix + "\n"

                    firstPrefix = " "*len(taskPrefix) + name + " = WeightedAllEventsTopPt.Weighted("
                    myline += construct(firstPrefix+"weighted", "", ",")
                    myline += construct(" "*len(firstPrefix)+"up", "Up", ",")
                    myline += construct(" "*len(firstPrefix)+"down", "Down", "),")
                myline += " "*len(taskPrefix)+"),\n"
            else:
                myline = "        "+'"'+taskName+'"'+": WeightedAllEvents(unweighted=%d, "%nevents+"weighted=%f, "%nevt+"up=%f, "%nevtup+"down=%f),\n"%nevtdown
            #print "\n"+myline
            myoutput += myline

    myresult = "_weightedAllEvents = {\n"
    myresult += "    "+'"'+"myera"+'"'+": {\n"
    myresult += myoutput
    myresult += "    },\n"
    myresult += "}\n\n"
    print ""
    print myresult
    print "Copy the above fragment to python/tools/pileupReweightedAllEvents.py and replace 'myera' with appropriate label, e.g. 2011A\n"
    print "Result was obtained with PU histograms:"
    print "  data:",opts.dataPU
    print "  dataup:",opts.dataPU.replace(".root","up.root")
    print "  datadown:",opts.dataPU.replace(".root","down.root")
    print "  MC:",opts.mcPU
    print ""
Example #32
0
def main(opts, args):
    if opts.report:
        multicrab.checkCrabInPath()

    cell = "\|\s+(?P<%s>\S+)\s+"

    lumi_re = re.compile((cell % "deliveredls")+
                         (cell % "delivered")+
                         (cell % "selectedls")+
                         (cell % "recorded")+"\|")
    #lumi_re = re.compile("\|\s(?P<recorded>\S+)\s")
    unit_re = re.compile("Recorded\(/(?P<unit>.*)\)")

    if not opts.truncate and os.path.exists(opts.output):
        f = open(opts.output, "r")
        data = json.load(f)
        f.close()
    
    files = []
    # only if no explicit files, or some directories explicitly given
    if len(opts.files) == 0 or len(opts.dirs) > 0:
        crabdirs = multicrab.getTaskDirectories(opts)
        for d in crabdirs:
            if isMCTask(d):
                print "  Ignoring task directory '%s', it looks like MC" % d
                continue
    
            if opts.report:
                cmd = ["crab", "-report", "-c", d]
                if opts.verbose:
                    print " ".join(cmd)
                p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                output = p.communicate()[0]
                ret = p.returncode
                if ret != 0:
                    print "Call to 'crab -report -d %s' failed with return value %d" % (d, ret)
                    print output
                    return 1
                if opts.verbose:
                    print output
        
            files.append((d, os.path.join(d, "res", "lumiSummary.json")))
    files.extend([(None, f) for f in opts.files])
    
    data = {}
    for task, jsonfile in files:
        #print
        #print "================================================================================"
        #print "Dataset %s:" % d
        cmd = ["lumiCalc2.py", "-i", jsonfile, "--nowarning", "overview", "-b", "stable"]
        if opts.lumicalc1:
            cmd = ["lumiCalc.py", "-i", jsonfile, "--with-correction", "--nowarning", "overview", "-b", "stable"]
        #cmd = ["lumiCalc.py", "-c", "frontier://LumiCalc/CMS_LUMI_PROD", "-r", "132440", "--nowarning", "overview"]
        #ret = subprocess.call(cmd)
        if opts.verbose:
            print " ".join(cmd)
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        output = p.communicate()[0]
        ret = p.returncode
        if ret != 0:
            print "Call to lumiCalc.py failed with return value %d with command" % ret
            print " ".join(cmd)
            print output
            return 1
        if opts.verbose:
            print output

        lines = output.split("\n")
        lines.reverse()
        lumi = -1.0
        unit = None
        for line in lines:
            m = unit_re.search(line)
            if m:
                unit = m.group("unit")
                break

            m = lumi_re.search(line)
            if m:
                lumi = float(m.group("recorded")) # lumiCalc2.py returns pb^-1
#                if opts.lumicalc1:
#                    lumi = lumi/1e6 # ub^-1 -> pb^-1, lumiCalc.py returns ub^-1
                continue

        if unit == None:
            raise Exception("Didn't find unit information from lumiCalc output, command was %s" % " ".join(cmd))
        lumi = convertLumi(lumi, unit)

        if task == None:
            print "File %s recorded luminosity %f pb^-1" % (jsonfile, lumi)
        else:
            print "Task %s recorded luminosity %f pb^-1" % (task, lumi)
            data[task] = lumi

        # Save the json file after each data task in case of future errors
        if len(data) > 0:
            f = open(opts.output, "wb")
            json.dump(data, f, sort_keys=True, indent=2)
            f.close()

    if len(data) > 0:
        f = open(opts.output, "wb")
        json.dump(data, f, sort_keys=True, indent=2)
        f.close()

    return 0
Example #33
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitMode = (len(opts.resubmit) > 0)
    resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"])

    if resubmitMode and resubmitIdListMode and len(taskDirs) != 1:
        print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len(
            taskDirs)
        return 1

    if resubmitMode and resubmitIdListMode:
        resubmitJobList = multicrab.prettyToJobList(opts.resubmit)

    # Obtain all jobs to be (re)submitted
    allJobs = []
    seBlackLists = {}
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        cfgparser = ConfigParser.ConfigParser()
        cfgparser.read(os.path.join(task, "share", "crab.cfg"))
        if cfgparser.has_section("GRID"):
            availableOptions = cfgparser.options("GRID")
            blacklist = None
            for ao in availableOptions:
                if ao.lower() == "se_black_list":
                    blacklist = cfgparser.get("GRID", ao)
                    break
            seBlackLists[task] = blacklist

        jobs = multicrab.crabStatusToJobs(task, printCrab=False)
        if not resubmitMode:  # normal submission
            if not "Created" in jobs:
                print "%s: no 'Created' jobs to submit" % task
                continue
            allJobs.extend(
                filter(lambda j: isInRange(opts, j), jobs["Created"]))
        elif not resubmitIdListMode:  # resubmit all failed jobs
            status = "all"
            if opts.resubmit == "aborted":
                status = "aborted"
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.failed(status):
                        allJobs.append(job)
        else:  # resubmit explicit list of jobs
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.id in resubmitJobList:
                        allJobs.append(job)
                        resubmitJobList.remove(job.id)

    # Set the number of maximum jobs to submit
    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    submitCommand = "-submit"
    if len(opts.resubmit) > 0:
        submitCommand = "-resubmit"

    sites = []
    siteSubmitIndex = 0
    if len(opts.toSites) > 0:
        sites = opts.toSites.split(",")

    # Submission loop
    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        # Construct list of jobs per task to submit
        njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            aux.addToDictList(jobsToSubmit, job.task, job.id)

        # If explicit list of sites to submit was given, get the site to submit this time
        crabOptions = []
        if len(sites) > 0:
            site = sites[siteSubmitIndex]
            siteSubmitIndex = (siteSubmitIndex + 1) % len(sites)
            crabOptions.append("-GRID.se_black_list= -GRID.se_white_list=" +
                               site)

        # Actual submission
        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            if len(jobs) == 1:
                pretty += ","  # CRAB thinks one number is number of jobs, the comma translates it to job ID
            command = ["crab", "-c", task, submitCommand, pretty]
            if opts.crabArgs != "":
                command.extend(opts.crabArgs.split(" "))
            if len(crabOptions) > 0:
                command.extend(crabOptions)
            if opts.addSeBlackList != "":
                lst = seBlackLists[task]
                if lst is None:
                    lst = opts.addSeBlackList
                else:
                    lst += "," + opts.addSeBlackList
                command.extend(["-GRID.se_black_list=" + lst])

            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                timesLeft = 1
                if opts.tryAgainTimes > 0:
                    timesLeft = opts.tryAgainTimes
                while timesLeft > 0:
                    ret = subprocess.call(command)
                    if ret == 0:
                        break
                    else:
                        timesLeft -= 1
                        message = "Command '%s' failed with exit code %d" % (
                            " ".join(command), ret)
                        if opts.allowFails:
                            print message
                        if opts.tryAgainTimes > 0:
                            print message
                            if timesLeft > 0:
                                print "Trying again after %d seconds (%d trials left)" % (
                                    opts.tryAgainSeconds, timesLeft)
                                time.sleep(opts.tryAgainSeconds)
                            else:
                                print "No trials left, continuing with next job block"
                        else:
                            raise Exception()

        # Sleep between submissions
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0