Ejemplo n.º 1
0
    def __init__(self, opts, directory, massPoints, datacardPatterns,
                 rootfilePatterns, clsType):
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" %
                            directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" %
                                    fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory,
                                              rf % mass)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                    if not os.path.isfile(rfname):
                        raise Exception(
                            "ROOT file (for shapes) '%s' does not exist!" %
                            rfname)

                    aux.addToDictList(self.rootfiles, mass, rfname)
Ejemplo n.º 2
0
    def __init__(self, opts, directory, massPoints, datacardPatterns, rootfilePatterns, clsType):
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts   = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" % directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" % fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory, rf % mass)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                    if not os.path.isfile(rfname):
                        raise Exception("ROOT file (for shapes) '%s' does not exist!" % rfname)

                    aux.addToDictList(self.rootfiles, mass, rfname)
Ejemplo n.º 3
0
def addConfigInfo(of,
                  dataset,
                  addLuminosity=True,
                  dataVersion=None,
                  additionalText={}):
    d = of.mkdir("configInfo")
    d.cd()

    # configinfo histogram
    configinfo = ROOT.TH1F("configinfo", "configinfo", 3, 0, 3)
    axis = configinfo.GetXaxis()

    def setValue(bin, name, value):
        axis.SetBinLabel(bin, name)
        configinfo.SetBinContent(bin, value)

    setValue(1, "control", 1)
    setValue(2, "energy", float(dataset.getEnergy()))
    if dataset.isData():
        if addLuminosity:
            setValue(3, "luminosity", dataset.getLuminosity())
    elif dataset.isMC():
        setValue(3, "crossSection", 1.0)

    configinfo.Write()
    configinfo.Delete()

    # dataVersion
    ds = dataset
    while hasattr(ds, "datasets"):
        ds = ds.datasets[0]

    if dataVersion is None:
        dataVersion = ds.dataVersion

    dv = ROOT.TNamed("dataVersion", dataVersion)
    dv.Write()
    dv.Delete()

    # codeVersion
    codeVersion = ROOT.TNamed("codeVersion", git.getCommitId())
    codeVersion.Write()
    codeVersion.Delete()

    for name, content in additionalText.iteritems():
        txt = ROOT.TNamed(name, content)
        txt.Write()
        txt.Delete()

    of.cd()
Ejemplo n.º 4
0
def addConfigInfo(of, dataset, addLuminosity=True, dataVersion=None, additionalText={}):
    d = of.mkdir("configInfo")
    d.cd()

    # configinfo histogram
    configinfo = ROOT.TH1F("configinfo", "configinfo", 3, 0, 3)
    axis = configinfo.GetXaxis()

    def setValue(bin, name, value):
        axis.SetBinLabel(bin, name)
        configinfo.SetBinContent(bin, value)

    setValue(1, "control", 1)
    setValue(2, "energy", float(dataset.getEnergy()))
    if dataset.isData():
        if addLuminosity:
            setValue(3, "luminosity", dataset.getLuminosity())
    elif dataset.isMC():
        setValue(3, "crossSection", 1.0)

    configinfo.Write()
    configinfo.Delete()

    # dataVersion
    ds = dataset
    while hasattr(ds, "datasets"):
        ds = ds.datasets[0]

    if dataVersion is None:
        dataVersion = ds.dataVersion

    dv = ROOT.TNamed("dataVersion", dataVersion)
    dv.Write()
    dv.Delete()

    # codeVersion
    codeVersion = ROOT.TNamed("codeVersion", git.getCommitId())
    codeVersion.Write()
    codeVersion.Delete()

    for name, content in additionalText.iteritems():
        txt = ROOT.TNamed(name, content)
        txt.Write()
        txt.Delete()

    of.cd()
Ejemplo n.º 5
0
    def run(self, proof=False, proofWorkers=None):
        outputDir = self._outputPrefix + "_" + time.strftime("%y%m%d_%H%M%S")
        if self._outputPostfix != "":
            outputDir += "_" + self._outputPostfix

        # Create output directory
        os.mkdir(outputDir)
        multicrabCfg = os.path.join(outputDir, "multicrab.cfg")
        f = open(multicrabCfg, "w")
        for dset in self._datasets:
            f.write("[%s]\n\n" % dset.getName())
        f.close()

        # Copy/merge lumi files
        lumifiles = set([d.getLumiFile() for d in self._datasets])
        lumidata = {}
        for fname in lumifiles:
            if not os.path.exists(fname):
                continue
            f = open(fname)
            data = json.load(f)
            f.close()
            for k in data.keys():
                if k in lumidata:
                    raise Exception(
                        "Luminosity JSON file %s has a dataset for which the luminosity has already been loaded; please check the luminosity JSON files\n%s"
                        % (fname, k, "\n".join(lumifiles)))
            lumidata.update(data)
        if len(lumidata) > 0:
            f = open(os.path.join(outputDir, "lumi.json"), "w")
            json.dump(lumidata, f, sort_keys=True, indent=2)
            f.close()

            # Add run range in a json file, if runMin and runMax in pset
            rrdata = {}
            for aname, analyzerIE in self._analyzers.iteritems():
                ana = analyzerIE.getAnalyzer()
                if hasattr(ana, "__call__"):
                    for dset in self._datasets:
                        if dset.getDataVersion().isData():
                            ana = ana(dset.getDataVersion())
                            if ana.__getattr__("runMax") > 0:
                                rrdata[aname] = "%s-%s" % (ana.__getattr__(
                                    "runMin"), ana.__getattr__("runMax"))
                                break
            if len(rrdata) > 0:
                f = open(os.path.join(outputDir, "runrange.json"), "w")
                json.dump(rrdata, f, sort_keys=True, indent=2)
                f.close()

        # Setup proof if asked
        _proof = None
        if proof:
            opt = ""
            if proofWorkers is not None:
                opt = "workers=%d" % proofWorkers
            _proof = ROOT.TProof.Open(opt)
            _proof.Exec("gSystem->Load(\"libHPlusAnalysis.so\");")

        # Init timing counters
        realTimeTotal = 0
        cpuTimeTotal = 0
        readMbytesTotal = 0
        callsTotal = 0

        # Process over datasets
        ndset = 0
        for dset in self._datasets:
            # Get data PU distributions from data
            #   This is done every time for a dataset since memory management is simpler to handle
            #   if all the histograms in memory are deleted after reading a dataset is finished
            hPUs = self._getDataPUhistos()
            # Initialize
            ndset += 1
            inputList = ROOT.TList()
            nanalyzers = 0
            anames = []
            usePUweights = False
            useTopPtCorrection = False
            nAllEventsPUWeighted = 0.0
            for aname, analyzerIE in self._analyzers.iteritems():
                if analyzerIE.runForDataset_(dset.getName()):
                    nanalyzers += 1
                    analyzer = analyzerIE.getAnalyzer()
                    if hasattr(analyzer, "__call__"):
                        analyzer = analyzer(dset.getDataVersion())
                        if analyzer is None:
                            raise Exception(
                                "Analyzer %s was specified as a function, but returned None"
                                % aname)
                        if not isinstance(analyzer, Analyzer):
                            raise Exception(
                                "Analyzer %s was specified as a function, but returned object of %s instead of Analyzer"
                                % (aname, analyzer.__class__.__name__))
                    inputList.Add(
                        ROOT.TNamed(
                            "analyzer_" + aname,
                            analyzer.className_() + ":" + analyzer.config_()))
                    # ttbar status for top pt corrections
                    ttbarStatus = "0"
                    useTopPtCorrection = analyzer.exists(
                        "useTopPtWeights") and analyzer.__getattr__(
                            "useTopPtWeights")
                    useTopPtCorrection = useTopPtCorrection and dset.getName(
                    ).startswith("TT")
                    if useTopPtCorrection:
                        ttbarStatus = "1"
                    inputList.Add(ROOT.TNamed("isttbar", ttbarStatus))
                    # Pileup reweighting
                    (puAllEvents, puStatus) = self._parsePUweighting(
                        dset, analyzer, aname, hPUs, inputList)
                    nAllEventsPUWeighted += puAllEvents
                    usePUweights = puStatus
                    # Sum skim counters (from ttree)
                    hSkimCounterSum = self._getSkimCounterSum(
                        dset.getFileNames())
                    inputList.Add(hSkimCounterSum)
                    # Add name
                    anames.append(aname)
            if nanalyzers == 0:
                print "Skipping %s, no analyzers" % dset.getName()
                continue

            print "*** Processing dataset (%d/%d): %s" % (
                ndset, len(self._datasets), dset.getName())
            if dset.getDataVersion().isData():
                lumivalue = "--- not available in lumi.json (or lumi.json not available) ---"
                if dset.getName() in lumidata.keys():
                    lumivalue = lumidata[dset.getName()]
                print "    Luminosity: %s fb-1" % lumivalue
            print "    Using pileup weights:", usePUweights
            if useTopPtCorrection:
                print "    Using top pt weights: True"

            resDir = os.path.join(outputDir, dset.getName(), "res")
            resFileName = os.path.join(resDir,
                                       "histograms-%s.root" % dset.getName())

            os.makedirs(resDir)

            tchain = ROOT.TChain("Events")

            for f in dset.getFileNames():
                tchain.Add(f)
            tchain.SetCacheLearnEntries(1000)
            tchain.SetCacheSize(
                10000000
            )  # Set cache size to 10 MB (somehow it is not automatically set contrary to ROOT docs)

            tselector = ROOT.SelectorImpl()

            # FIXME: TChain.GetEntries() is needed only to give a time
            # estimate for the analysis. If this turns out to be slow,
            # we could store the number of events along the file names
            # (whatever is the method for that)
            inputList.Add(ROOT.TNamed("entries", str(tchain.GetEntries())))
            if dset.getDataVersion().isMC():
                inputList.Add(ROOT.TNamed("isMC", "1"))
            else:
                inputList.Add(ROOT.TNamed("isMC", "0"))
            inputList.Add(ROOT.TNamed("options", self._options.serialize_()))
            inputList.Add(ROOT.TNamed("printStatus", "1"))

            if _proof is not None:
                tchain.SetProof(True)
                inputList.Add(
                    ROOT.TNamed("PROOF_OUTPUTFILE_LOCATION", resFileName))
            else:
                inputList.Add(ROOT.TNamed("OUTPUTFILE_LOCATION", resFileName))

            tselector.SetInputList(inputList)

            readBytesStart = ROOT.TFile.GetFileBytesRead()
            readCallsStart = ROOT.TFile.GetFileReadCalls()
            timeStart = time.time()
            clockStart = time.clock()

            if self._maxEvents > 0:
                tchain.SetCacheEntryRange(0, self._maxEvents)
                tchain.Process(tselector, "", self._maxEvents)
            else:
                tchain.Process(tselector)
            if _debugMemoryConsumption:
                print "    MEMDBG: TChain cache statistics:"
                tchain.PrintCacheStats()

            # Obtain Nall events for top pt corrections
            NAllEventsTopPt = 0
            if useTopPtCorrection:
                for inname in dset.getFileNames():
                    fIN = ROOT.TFile.Open(inname)
                    h = fIN.Get("configInfo/topPtWeightAllEvents")
                    if h != None:
                        binNumber = 2  # nominal
                        if hasattr(analyzer, "topPtSystematicVariation"):
                            variation = getattr(analyzer,
                                                "topPtSystematicVariation")
                            if variation == "minus":
                                binNumber = 0
                            # FIXME: The bin is to be added to the ttrees
                            #elif variation == "plus":
                            #binNumber = 3
                            #if not h.GetXaxis().GetBinLabel().endsWith("Plus"):
                            #raise Exception("This should not happen")
                        if binNumber > 0:
                            NAllEventsTopPt += h.GetBinContent(binNumber)
                    else:
                        raise Exception(
                            "Warning: Could not obtain N(AllEvents) for top pt reweighting"
                        )
                    ROOT.gROOT.GetListOfFiles().Remove(fIN)
                    fIN.Close()

            # Write configInfo
            fIN = ROOT.TFile.Open(dset.getFileNames()[0])
            cinfo = fIN.Get("configInfo/configinfo")
            tf = ROOT.TFile.Open(resFileName, "UPDATE")
            configInfo = tf.Get("configInfo")
            if configInfo == None:
                configInfo = tf.mkdir("configInfo")
            configInfo.cd()
            dv = ROOT.TNamed("dataVersion", str(dset.getDataVersion()))
            dv.Write()
            dv.Delete()
            cv = ROOT.TNamed("codeVersionAnalysis", git.getCommitId())
            cv.Write()
            cv.Delete()
            if not cinfo == None:
                # Add more information to configInfo
                n = cinfo.GetNbinsX()
                cinfo.SetBins(n + 3, 0, n + 3)
                cinfo.GetXaxis().SetBinLabel(n + 1, "isData")
                cinfo.GetXaxis().SetBinLabel(n + 2, "isPileupReweighted")
                cinfo.GetXaxis().SetBinLabel(n + 3, "isTopPtReweighted")
                # Add "isData" column
                if not dset.getDataVersion().isMC():
                    cinfo.SetBinContent(n + 1, cinfo.GetBinContent(1))
                # Add "isPileupReweighted" column
                if usePUweights:
                    cinfo.SetBinContent(n + 2,
                                        nAllEventsPUWeighted / nanalyzers)
                # Add "isTopPtReweighted" column
                if useTopPtCorrection:
                    cinfo.SetBinContent(n + 3, NAllEventsTopPt)
                # Write
                cinfo.Write()
                ROOT.gROOT.GetListOfFiles().Remove(fIN)
                fIN.Close()

            # Memory management
            configInfo.Delete()
            ROOT.gROOT.GetListOfFiles().Remove(tf)
            tf.Close()
            for item in inputList:
                if isinstance(item, ROOT.TObject):
                    item.Delete()
            inputList = None
            if hSkimCounterSum != None:
                hSkimCounterSum.Delete()
            if _debugMemoryConsumption:
                print "      MEMDBG: gDirectory", ROOT.gDirectory.GetList(
                ).GetSize()
                print "      MEMDBG: list ", ROOT.gROOT.GetList().GetSize()
                print "      MEMDBG: globals ", ROOT.gROOT.GetListOfGlobals(
                ).GetSize()
                #for item in ROOT.gROOT.GetListOfGlobals():
                #print item.GetName()
                print "      MEMDBG: files", ROOT.gROOT.GetListOfFiles(
                ).GetSize()
                #for item in ROOT.gROOT.GetListOfFiles():
                #    print "          %d items"%item.GetList().GetSize()
                print "      MEMDBG: specials ", ROOT.gROOT.GetListOfSpecials(
                ).GetSize()
                for item in ROOT.gROOT.GetListOfSpecials():
                    print "          " + item.GetName()

                #gDirectory.GetList().Delete();
                #gROOT.GetList().Delete();
                #gROOT.GetListOfGlobals().Delete();
                #TIter next(gROOT.GetList());
                #while (TObject* o = dynamic_cast<TObject*>(next())) {
                #o.Delete();
                #}

            # Performance and information
            timeStop = time.time()
            clockStop = time.clock()
            readCallsStop = ROOT.TFile.GetFileReadCalls()
            readBytesStop = ROOT.TFile.GetFileBytesRead()

            calls = ""
            if _proof is not None:
                tchain.SetProof(False)
                queryResult = _proof.GetQueryResult()
                cpuTime = queryResult.GetUsedCPU()
                readMbytes = queryResult.GetBytes() / 1024 / 1024
            else:
                cpuTime = clockStop - clockStart
                readMbytes = float(readBytesStop -
                                   readBytesStart) / 1024 / 1024
                calls = " (%d calls)" % (readCallsStop - readCallsStart)
            realTime = timeStop - timeStart
            print "    Real time %.2f, CPU time %.2f (%.1f %%), read %.2f MB%s, read speed %.2f MB/s" % (
                realTime, cpuTime, cpuTime / realTime * 100, readMbytes, calls,
                readMbytes / realTime)
            print
            realTimeTotal += realTime
            cpuTimeTotal += cpuTime
            readMbytesTotal += readMbytes

        print
        if len(self._datasets) > 1:
            print "    Total: Real time %.2f, CPU time %.2f (%.1f %%), read %.2f MB, read speed %.2f MB/s" % (
                realTimeTotal, cpuTimeTotal, cpuTimeTotal / realTimeTotal *
                100, readMbytesTotal, readMbytesTotal / realTimeTotal)
        print "    Results are in", outputDir

        return outputDir
Ejemplo n.º 6
0
    def run(self, proof=False, proofWorkers=None):
        outputDir = self._outputPrefix+"_"+time.strftime("%y%m%d_%H%M%S")
        if self._outputPostfix != "":
            outputDir += "_"+self._outputPostfix

        # Create output directory
        os.mkdir(outputDir)
        self.Print("Created output directory %s" % (sh_Note + outputDir + sh_Normal), True)

        multicrabCfg = os.path.join(outputDir, "multicrab.cfg")
        f = open(multicrabCfg, "w")
        # For-loop: All datasets to be ran
        for dset in self._datasets:
            f.write("[%s]\n\n" % dset.getName())
        f.close()

        # Copy/merge lumi files
        lumifiles = set([d.getLumiFile() for d in self._datasets])
        lumidata = {}
        for fname in lumifiles:
            if not os.path.exists(fname):
                continue
            f = open(fname)
            data = json.load(f)
            f.close()
            for k in data.keys():
                if k in lumidata:
                    msg  = "Luminosity JSON file %s has a dataset (%s) for which the luminosity has already been loaded. " % (fname, k) 
                    msg += "Please check the luminosity JSON files:\n%s" % ("\n".join(lumifiles))
                    raise Exception(sh_Error + msg + sh_Normal)
            lumidata.update(data)
        if len(lumidata) > 0:
            # Add run range in a json file, if runMin and runMax in pset
            rrdata = {}
            for aname, analyzerIE in self._analyzers.iteritems():
                ana = analyzerIE.getAnalyzer()
                if hasattr(ana, "__call__"):
                    for dset in self._datasets:
                        if dset.getDataVersion().isData():
                            ana = ana(dset.getDataVersion())
                            if ana.__getattr__("runMax") > 0:
                                rrdata[aname] = "%s-%s"%(ana.__getattr__("runMin"),ana.__getattr__("runMax"))
                                #lumidata[aname] = ana.__getattr__("lumi")
                                break
            if len(rrdata) > 0:
                f = open(os.path.join(outputDir, "runrange.json"), "w")
                json.dump(rrdata, f, sort_keys=True, indent=2)
                f.close()

            # Create the luminosity JSON file
            f = open(os.path.join(outputDir, "lumi.json"), "w")
            json.dump(lumidata, f, sort_keys=True, indent=2)
            self.Verbose("Created luminosity json file %s" % (sh_Note + f.name + sh_Normal), True)
            f.close()

        # Setup proof if asked
        _proof = None
        if proof:
            opt = ""
            if proofWorkers is not None:
                opt = "workers=%d"%proofWorkers
            _proof = ROOT.TProof.Open(opt)
            _proof.Exec("gSystem->Load(\"libHPlusAnalysis.so\");")

        # Init timing counters
        realTimeTotal = 0
        cpuTimeTotal = 0
        readMbytesTotal = 0
        callsTotal = 0

        # Print the datasets that will be run on!
        self.Print("Will process %d datasets in total:" % (len(self._datasets) ), True)
        for i, d in enumerate(self._datasets, 1):
            self.Print("%d) %s" % (i, sh_Note + d.getName() + sh_Normal), i==0)

        # Process over datasets
        ndset = 0
        for i, dset in enumerate(self._datasets, 1):
            hPUs = self._getDataPUhistos()
            # Initialize
            ndset += 1
            inputList = ROOT.TList()
            nanalyzers = 0
            anames = []
            usePUweights = False
            useTopPtCorrection = False
            nAllEventsPUWeighted = 0.0
            for aname, analyzerIE in self._analyzers.iteritems():
                if analyzerIE.runForDataset_(dset.getName()):
                    nanalyzers += 1
                    analyzer = analyzerIE.getAnalyzer()
                    if hasattr(analyzer, "__call__"):
                        analyzer = analyzer(dset.getDataVersion())
                        if analyzer is None:
                            raise Exception("Analyzer %s was specified as a function, but returned None" % aname)
                        if not isinstance(analyzer, Analyzer):
                            raise Exception("Analyzer %s was specified as a function, but returned object of %s instead of Analyzer" % (aname, analyzer.__class__.__name__))
                    inputList.Add(ROOT.TNamed("analyzer_"+aname, analyzer.className_()+":"+analyzer.config_()))
                    # ttbar status for top pt corrections
                    ttbarStatus = "0"
                    useTopPtCorrection = analyzer.exists("useTopPtWeights") and analyzer.__getattr__("useTopPtWeights")
                    #useTopPtCorrection = useTopPtCorrection and dset.getName().startswith("TT")
                    useTopPtCorrection = useTopPtCorrection and self.isTTbarDataset(dset)
                    if useTopPtCorrection:
                        ttbarStatus = "1"
                    inputList.Add(ROOT.TNamed("isttbar", ttbarStatus))
                    # intermediate H+ status for reweighting the NoNeutral samples
                    intermediateStatus = "0"
                    if dset.getName().find("IntermediateMassNoNeutral") > 0:
                        intermediateStatus = "1"
                    inputList.Add(ROOT.TNamed("isIntermediateNoNeutral", intermediateStatus))

                    # Pileup reweighting
                    self.Verbose("Getting pileup reweighting weights", True)
                    (puAllEvents, puStatus) = self._parsePUweighting(dset, analyzer, aname, hPUs, inputList)
                    nAllEventsPUWeighted += puAllEvents
                    usePUweights = puStatus
                    # Sum skim counters (from ttree)
                    hSkimCounterSum = self._getSkimCounterSum(dset.getFileNames())
                    inputList.Add(hSkimCounterSum)
                    # Add name
                    anames.append(aname)
            if nanalyzers == 0:
                self.Print("Skipping %s, no analyzers" % dset.getName(), True)
                continue                            

            self.Print("Processing dataset (%d/%d)" % (ndset, len(self._datasets) ))
            align= "{:<23} {:<1} {:<60}"
            info = {}
            info["Dataset"] = dset.getName()
            if dset.getDataVersion().isData():
                lumivalue = "--- not available in lumi.json (or lumi.json not available) ---"
                if dset.getName() in lumidata.keys():
                    lumivalue = lumidata[dset.getName()]
                info["Luminosity"] = str(lumivalue) + " fb-1"
            info["UsePUweights"] = usePUweights
            info["UseTopPtCorrection"] = useTopPtCorrection
            for key in info:
                self.Print(align.format(key, ":", info[key]), False)

            # Create dir for dataset ROOTT files   
            resDir = os.path.join(outputDir, dset.getName(), "res")
            resFileName = os.path.join(resDir, "histograms-%s.root"%dset.getName())
            os.makedirs(resDir)

            tchain = ROOT.TChain("Events")
            # For-loop: All file names for dataset
            for f in dset.getFileNames():
                tchain.Add(f)
            tchain.SetCacheLearnEntries(1000);
            tchain.SetCacheSize(10000000) # Set cache size to 10 MB (somehow it is not automatically set contrary to ROOT docs)

            tselector = ROOT.SelectorImpl()

            # FIXME: TChain.GetEntries() is needed only to give a time
            # estimate for the analysis. If this turns out to be slow,
            # we could store the number of events along the file names
            # (whatever is the method for that)
            inputList.Add(ROOT.TNamed("entries", str(tchain.GetEntries())))
            if dset.getDataVersion().isMC():
                inputList.Add(ROOT.TNamed("isMC", "1"))
            else:
                inputList.Add(ROOT.TNamed("isMC", "0"))
            inputList.Add(ROOT.TNamed("options", self._options.serialize_()))
            inputList.Add(ROOT.TNamed("printStatus", "1"))

            if _proof is not None:
                tchain.SetProof(True)
                inputList.Add(ROOT.TNamed("PROOF_OUTPUTFILE_LOCATION", resFileName))
            else:
                inputList.Add(ROOT.TNamed("OUTPUTFILE_LOCATION", resFileName))

            tselector.SetInputList(inputList)

            readBytesStart = ROOT.TFile.GetFileBytesRead()
            readCallsStart = ROOT.TFile.GetFileReadCalls()
            timeStart = time.time()
            clockStart = time.clock()

            # Determine how many events to run on for given dataset
            if len(self._maxEvents.keys()) > 0:
                key = ""
                for k in self._maxEvents.keys():
                    if k.lower() == "all":
                        key = k
                        break
                    maxEv_re = re.compile(k)
                    match = maxEv_re.search(dset.getName())
                    if match:
                        key = k
                        break
                if key == "":
                    tchain.Process(tselector)
                else:
                    maxEvts  = self._maxEvents[key]
                    if maxEvts == -1:
                        tchain.Process(tselector)
                    else:
                        tchain.SetCacheEntryRange(0, self._maxEvents[key])
                        tchain.Process(tselector, "", self._maxEvents[key])
            else:
                tchain.Process(tselector)
            if _debugMemoryConsumption:
                print "    MEMDBG: TChain cache statistics:"
                tchain.PrintCacheStats()
            
            # Obtain Nall events for top pt corrections
            NAllEventsTopPt = 0
            if useTopPtCorrection:
                for inname in dset.getFileNames():
                    fIN = ROOT.TFile.Open(inname)
                    h = fIN.Get("configInfo/topPtWeightAllEvents")
                    if h != None:
                        binNumber = 2 # nominal
                        if hasattr(analyzer, "topPtSystematicVariation"):
                            variation = getattr(analyzer, "topPtSystematicVariation")
                            if variation == "minus":
                                binNumber = 0
                            # FIXME: The bin is to be added to the ttrees
                            #elif variation == "plus":
                                #binNumber = 3
                                #if not h.GetXaxis().GetBinLabel().endsWith("Plus"):
                                    #raise Exception("This should not happen")
                        if binNumber > 0:
                            NAllEventsTopPt += h.GetBinContent(binNumber)
                    else:
                        raise Exception("Warning: Could not obtain N(AllEvents) for top pt reweighting")
                    ROOT.gROOT.GetListOfFiles().Remove(fIN)
                    fIN.Close()

            # Write configInfo
            fIN = ROOT.TFile.Open(dset.getFileNames()[0])
            cinfo = fIN.Get("configInfo/configinfo")
            tf = ROOT.TFile.Open(resFileName, "UPDATE")
            configInfo = tf.Get("configInfo")
            if configInfo == None:
                configInfo = tf.mkdir("configInfo")
            configInfo.cd()
            dv = ROOT.TNamed("dataVersion", str(dset.getDataVersion()))
            dv.Write()
            dv.Delete()
            cv = ROOT.TNamed("codeVersionAnalysis", git.getCommitId())
            cv.Write()
            cv.Delete()
            if not cinfo == None:
                # Add more information to configInfo
                n = cinfo.GetNbinsX()
                cinfo.SetBins(n+3, 0, n+3)
                cinfo.GetXaxis().SetBinLabel(n+1, "isData")
                cinfo.GetXaxis().SetBinLabel(n+2, "isPileupReweighted")
                cinfo.GetXaxis().SetBinLabel(n+3, "isTopPtReweighted")
                # Add "isData" column
                if not dset.getDataVersion().isMC():
                    cinfo.SetBinContent(n+1, cinfo.GetBinContent(1))
                # Add "isPileupReweighted" column
                if usePUweights:
                    cinfo.SetBinContent(n+2, nAllEventsPUWeighted / nanalyzers)
                # Add "isTopPtReweighted" column
                if useTopPtCorrection:
                    cinfo.SetBinContent(n+3, NAllEventsTopPt)
                # Write
                cinfo.Write()
                ROOT.gROOT.GetListOfFiles().Remove(fIN);
                fIN.Close()

            # Memory management
            configInfo.Delete()
            ROOT.gROOT.GetListOfFiles().Remove(tf);
            tf.Close()
            for item in inputList:
                if isinstance(item, ROOT.TObject):
                    item.Delete()
            inputList = None
            if hSkimCounterSum != None:
                hSkimCounterSum.Delete()
            if _debugMemoryConsumption:
                print "      MEMDBG: gDirectory", ROOT.gDirectory.GetList().GetSize()
                print "      MEMDBG: list ", ROOT.gROOT.GetList().GetSize()
                print "      MEMDBG: globals ", ROOT.gROOT.GetListOfGlobals().GetSize()
                #for item in ROOT.gROOT.GetListOfGlobals():
                    #print item.GetName()
                print "      MEMDBG: files", ROOT.gROOT.GetListOfFiles().GetSize()
                #for item in ROOT.gROOT.GetListOfFiles():
                #    print "          %d items"%item.GetList().GetSize()
                print "      MEMDBG: specials ", ROOT.gROOT.GetListOfSpecials().GetSize()
                for item in ROOT.gROOT.GetListOfSpecials():
                    print "          "+item.GetName()
                
                #gDirectory.GetList().Delete();
                #gROOT.GetList().Delete();
                #gROOT.GetListOfGlobals().Delete();
                #TIter next(gROOT.GetList());
                #while (TObject* o = dynamic_cast<TObject*>(next())) {
                  #o.Delete();
                #}
            
            # Performance and information
            timeStop = time.time()
            clockStop = time.clock()
            readCallsStop = ROOT.TFile.GetFileReadCalls()
            readBytesStop = ROOT.TFile.GetFileBytesRead()

            calls = ""
            if _proof is not None:
                tchain.SetProof(False)
                queryResult = _proof.GetQueryResult()
                cpuTime = queryResult.GetUsedCPU()
                readMbytes = queryResult.GetBytes()/1024/1024
            else:
                cpuTime = clockStop-clockStart
                readMbytes = float(readBytesStop-readBytesStart)/1024/1024
                calls = " (%d calls)" % (readCallsStop-readCallsStart)
            realTime = timeStop-timeStart

            # Print usage stats in user-friendly formatting
            self.PrintStats(readCallsStop, readCallsStart, cpuTime, realTime, readMbytes)

            # Time accumulation
            realTimeTotal   += realTime
            cpuTimeTotal    += cpuTime
            readMbytesTotal += readMbytes

        # Total time stats
        self.PrintStatsTotal(readMbytes, cpuTimeTotal, realTimeTotal, readMbytesTotal)

        # Inform user of location of results
        self.Print("Results are in %s" % (sh_Success + outputDir + sh_Normal), True)
        return outputDir
Ejemplo n.º 7
0
    def run(self, proof=False, proofWorkers=None):
        outputDir = self._outputPrefix + "_" + time.strftime("%y%m%d_%H%M%S")
        if self._outputPostfix != "":
            outputDir += "_" + self._outputPostfix

        # Create output directory
        os.mkdir(outputDir)
        self.Print(
            "Created output directory %s" % (sh_Note + outputDir + sh_Normal),
            True)

        multicrabCfg = os.path.join(outputDir, "multicrab.cfg")
        f = open(multicrabCfg, "w")
        # For-loop: All datasets to be ran
        for dset in self._datasets:
            f.write("[%s]\n\n" % dset.getName())
        f.close()

        # Copy/merge lumi files
        lumifiles = set([d.getLumiFile() for d in self._datasets])
        lumidata = {}
        for fname in lumifiles:
            if not os.path.exists(fname):
                continue
            f = open(fname)
            data = json.load(f)
            f.close()
            for k in data.keys():
                if k in lumidata:
                    msg = "Luminosity JSON file %s has a dataset (%s) for which the luminosity has already been loaded. " % (
                        fname, k)
                    msg += "Please check the luminosity JSON files:\n%s" % (
                        "\n".join(lumifiles))
                    raise Exception(sh_Error + msg + sh_Normal)
            lumidata.update(data)
        if len(lumidata) > 0:
            # Add run range in a json file, if runMin and runMax in pset
            rrdata = {}
            for aname, analyzerIE in self._analyzers.iteritems():
                ana = analyzerIE.getAnalyzer()
                if hasattr(ana, "__call__"):
                    for dset in self._datasets:
                        if dset.getDataVersion().isData():
                            ana = ana(dset.getDataVersion())
                            if ana.__getattr__("runMax") > 0:
                                rrdata[aname] = "%s-%s" % (ana.__getattr__(
                                    "runMin"), ana.__getattr__("runMax"))
                                #lumidata[aname] = ana.__getattr__("lumi")
                                break
            if len(rrdata) > 0:
                f = open(os.path.join(outputDir, "runrange.json"), "w")
                json.dump(rrdata, f, sort_keys=True, indent=2)
                f.close()

            # Create the luminosity JSON file
            f = open(os.path.join(outputDir, "lumi.json"), "w")
            json.dump(lumidata, f, sort_keys=True, indent=2)
            self.Verbose(
                "Created luminosity json file %s" %
                (sh_Note + f.name + sh_Normal), True)
            f.close()

        # Setup proof if asked
        _proof = None
        if proof:
            opt = ""
            if proofWorkers is not None:
                opt = "workers=%d" % proofWorkers
            _proof = ROOT.TProof.Open(opt)
            _proof.Exec("gSystem->Load(\"libHPlusAnalysis.so\");")

        # Init timing counters
        realTimeTotal = 0
        cpuTimeTotal = 0
        readMbytesTotal = 0
        callsTotal = 0

        # Print the datasets that will be run on!
        self.Print(
            "Will process %d datasets in total:" % (len(self._datasets)), True)
        for i, d in enumerate(self._datasets, 1):
            self.Print("%d) %s" % (i, sh_Note + d.getName() + sh_Normal),
                       i == 0)

        # Process over datasets
        ndset = 0
        for i, dset in enumerate(self._datasets, 1):
            hPUs = self._getDataPUhistos()
            # Initialize
            ndset += 1
            inputList = ROOT.TList()
            nanalyzers = 0
            anames = []
            usePUweights = False
            useTopPtCorrection = False
            nAllEventsPUWeighted = 0.0
            for aname, analyzerIE in self._analyzers.iteritems():
                if analyzerIE.runForDataset_(dset.getName()):
                    nanalyzers += 1
                    analyzer = analyzerIE.getAnalyzer()
                    if hasattr(analyzer, "__call__"):
                        analyzer = analyzer(dset.getDataVersion())
                        if analyzer is None:
                            raise Exception(
                                "Analyzer %s was specified as a function, but returned None"
                                % aname)
                        if not isinstance(analyzer, Analyzer):
                            raise Exception(
                                "Analyzer %s was specified as a function, but returned object of %s instead of Analyzer"
                                % (aname, analyzer.__class__.__name__))
                    inputList.Add(
                        ROOT.TNamed(
                            "analyzer_" + aname,
                            analyzer.className_() + ":" + analyzer.config_()))
                    # ttbar status for top pt corrections
                    ttbarStatus = "0"
                    useTopPtCorrection = analyzer.exists(
                        "useTopPtWeights") and analyzer.__getattr__(
                            "useTopPtWeights")
                    #useTopPtCorrection = useTopPtCorrection and dset.getName().startswith("TT")
                    useTopPtCorrection = useTopPtCorrection and self.isTTbarDataset(
                        dset)
                    if useTopPtCorrection:
                        ttbarStatus = "1"
                    inputList.Add(ROOT.TNamed("isttbar", ttbarStatus))
                    # intermediate H+ status for reweighting the NoNeutral samples
                    intermediateStatus = "0"
                    if dset.getName().find("IntermediateMassNoNeutral") > 0:
                        intermediateStatus = "1"
                    inputList.Add(
                        ROOT.TNamed("isIntermediateNoNeutral",
                                    intermediateStatus))

                    # Pileup reweighting
                    self.Verbose("Getting pileup reweighting weights", True)
                    (puAllEvents, puStatus) = self._parsePUweighting(
                        dset, analyzer, aname, hPUs, inputList)
                    nAllEventsPUWeighted += puAllEvents
                    usePUweights = puStatus
                    # Sum skim counters (from ttree)
                    hSkimCounterSum = self._getSkimCounterSum(
                        dset.getFileNames())
                    inputList.Add(hSkimCounterSum)
                    # Add name
                    anames.append(aname)
            if nanalyzers == 0:
                self.Print("Skipping %s, no analyzers" % dset.getName(), True)
                continue

            self.Print("Processing dataset (%d/%d)" %
                       (ndset, len(self._datasets)))
            align = "{:<23} {:<1} {:<60}"
            info = {}
            info["Dataset"] = dset.getName()
            if dset.getDataVersion().isData():
                lumivalue = "--- not available in lumi.json (or lumi.json not available) ---"
                if dset.getName() in lumidata.keys():
                    lumivalue = lumidata[dset.getName()]
                info["Luminosity"] = str(lumivalue) + " fb-1"
            info["UsePUweights"] = usePUweights
            info["UseTopPtCorrection"] = useTopPtCorrection
            for key in info:
                self.Print(align.format(key, ":", info[key]), False)

            # Create dir for dataset ROOTT files
            resDir = os.path.join(outputDir, dset.getName(), "res")
            resFileName = os.path.join(resDir,
                                       "histograms-%s.root" % dset.getName())
            os.makedirs(resDir)

            tchain = ROOT.TChain("Events")
            # For-loop: All file names for dataset
            for f in dset.getFileNames():
                tchain.Add(f)
            tchain.SetCacheLearnEntries(1000)
            tchain.SetCacheSize(
                10000000
            )  # Set cache size to 10 MB (somehow it is not automatically set contrary to ROOT docs)

            tselector = ROOT.SelectorImpl()

            # FIXME: TChain.GetEntries() is needed only to give a time
            # estimate for the analysis. If this turns out to be slow,
            # we could store the number of events along the file names
            # (whatever is the method for that)
            inputList.Add(ROOT.TNamed("entries", str(tchain.GetEntries())))
            if dset.getDataVersion().isMC():
                inputList.Add(ROOT.TNamed("isMC", "1"))
            else:
                inputList.Add(ROOT.TNamed("isMC", "0"))
            inputList.Add(ROOT.TNamed("options", self._options.serialize_()))
            inputList.Add(ROOT.TNamed("printStatus", "1"))

            if _proof is not None:
                tchain.SetProof(True)
                inputList.Add(
                    ROOT.TNamed("PROOF_OUTPUTFILE_LOCATION", resFileName))
            else:
                inputList.Add(ROOT.TNamed("OUTPUTFILE_LOCATION", resFileName))

            tselector.SetInputList(inputList)

            readBytesStart = ROOT.TFile.GetFileBytesRead()
            readCallsStart = ROOT.TFile.GetFileReadCalls()
            timeStart = time.time()
            clockStart = time.clock()

            # Determine how many events to run on for given dataset
            if len(self._maxEvents.keys()) > 0:
                key = ""
                for k in self._maxEvents.keys():
                    if k.lower() == "all":
                        key = k
                        break
                    maxEv_re = re.compile(k)
                    match = maxEv_re.search(dset.getName())
                    if match:
                        key = k
                        break
                if key == "":
                    tchain.Process(tselector)
                else:
                    maxEvts = self._maxEvents[key]
                    if maxEvts == -1:
                        tchain.Process(tselector)
                    else:
                        tchain.SetCacheEntryRange(0, self._maxEvents[key])
                        tchain.Process(tselector, "", self._maxEvents[key])
            else:
                tchain.Process(tselector)
            if _debugMemoryConsumption:
                print "    MEMDBG: TChain cache statistics:"
                tchain.PrintCacheStats()

            # Obtain Nall events for top pt corrections
            NAllEventsTopPt = 0
            if useTopPtCorrection:
                for inname in dset.getFileNames():
                    fIN = ROOT.TFile.Open(inname)
                    h = fIN.Get("configInfo/topPtWeightAllEvents")
                    if h != None:
                        binNumber = 2  # nominal
                        if hasattr(analyzer, "topPtSystematicVariation"):
                            variation = getattr(analyzer,
                                                "topPtSystematicVariation")
                            if variation == "minus":
                                binNumber = 0
                            # FIXME: The bin is to be added to the ttrees
                            #elif variation == "plus":
                            #binNumber = 3
                            #if not h.GetXaxis().GetBinLabel().endsWith("Plus"):
                            #raise Exception("This should not happen")
                        if binNumber > 0:
                            NAllEventsTopPt += h.GetBinContent(binNumber)
                    else:
                        raise Exception(
                            "Warning: Could not obtain N(AllEvents) for top pt reweighting"
                        )
                    ROOT.gROOT.GetListOfFiles().Remove(fIN)
                    fIN.Close()

            # Write configInfo
            fIN = ROOT.TFile.Open(dset.getFileNames()[0])
            cinfo = fIN.Get("configInfo/configinfo")
            tf = ROOT.TFile.Open(resFileName, "UPDATE")
            configInfo = tf.Get("configInfo")
            if configInfo == None:
                configInfo = tf.mkdir("configInfo")
            configInfo.cd()
            dv = ROOT.TNamed("dataVersion", str(dset.getDataVersion()))
            dv.Write()
            dv.Delete()
            cv = ROOT.TNamed("codeVersionAnalysis", git.getCommitId())
            cv.Write()
            cv.Delete()
            if not cinfo == None:
                # Add more information to configInfo
                n = cinfo.GetNbinsX()
                cinfo.SetBins(n + 3, 0, n + 3)
                cinfo.GetXaxis().SetBinLabel(n + 1, "isData")
                cinfo.GetXaxis().SetBinLabel(n + 2, "isPileupReweighted")
                cinfo.GetXaxis().SetBinLabel(n + 3, "isTopPtReweighted")
                # Add "isData" column
                if not dset.getDataVersion().isMC():
                    cinfo.SetBinContent(n + 1, cinfo.GetBinContent(1))
                # Add "isPileupReweighted" column
                if usePUweights:
                    cinfo.SetBinContent(n + 2,
                                        nAllEventsPUWeighted / nanalyzers)
                # Add "isTopPtReweighted" column
                if useTopPtCorrection:
                    cinfo.SetBinContent(n + 3, NAllEventsTopPt)
                # Write
                cinfo.Write()
                ROOT.gROOT.GetListOfFiles().Remove(fIN)
                fIN.Close()

            # Memory management
            configInfo.Delete()
            ROOT.gROOT.GetListOfFiles().Remove(tf)
            tf.Close()
            for item in inputList:
                if isinstance(item, ROOT.TObject):
                    item.Delete()
            inputList = None
            if hSkimCounterSum != None:
                hSkimCounterSum.Delete()
            if _debugMemoryConsumption:
                print "      MEMDBG: gDirectory", ROOT.gDirectory.GetList(
                ).GetSize()
                print "      MEMDBG: list ", ROOT.gROOT.GetList().GetSize()
                print "      MEMDBG: globals ", ROOT.gROOT.GetListOfGlobals(
                ).GetSize()
                #for item in ROOT.gROOT.GetListOfGlobals():
                #print item.GetName()
                print "      MEMDBG: files", ROOT.gROOT.GetListOfFiles(
                ).GetSize()
                #for item in ROOT.gROOT.GetListOfFiles():
                #    print "          %d items"%item.GetList().GetSize()
                print "      MEMDBG: specials ", ROOT.gROOT.GetListOfSpecials(
                ).GetSize()
                for item in ROOT.gROOT.GetListOfSpecials():
                    print "          " + item.GetName()

                #gDirectory.GetList().Delete();
                #gROOT.GetList().Delete();
                #gROOT.GetListOfGlobals().Delete();
                #TIter next(gROOT.GetList());
                #while (TObject* o = dynamic_cast<TObject*>(next())) {
                #o.Delete();
                #}

            # Performance and information
            timeStop = time.time()
            clockStop = time.clock()
            readCallsStop = ROOT.TFile.GetFileReadCalls()
            readBytesStop = ROOT.TFile.GetFileBytesRead()

            calls = ""
            if _proof is not None:
                tchain.SetProof(False)
                queryResult = _proof.GetQueryResult()
                cpuTime = queryResult.GetUsedCPU()
                readMbytes = queryResult.GetBytes() / 1024 / 1024
            else:
                cpuTime = clockStop - clockStart
                readMbytes = float(readBytesStop -
                                   readBytesStart) / 1024 / 1024
                calls = " (%d calls)" % (readCallsStop - readCallsStart)
            realTime = timeStop - timeStart

            # Print usage stats in user-friendly formatting
            self.PrintStats(readCallsStop, readCallsStart, cpuTime, realTime,
                            readMbytes)

            # Time accumulation
            realTimeTotal += realTime
            cpuTimeTotal += cpuTime
            readMbytesTotal += readMbytes

        # Total time stats
        self.PrintStatsTotal(readMbytes, cpuTimeTotal, realTimeTotal,
                             readMbytesTotal)

        # Inform user of location of results
        self.Print("Results are in %s" % (sh_Success + outputDir + sh_Normal),
                   True)
        return outputDir
Ejemplo n.º 8
0
    def __init__(self, opts, directory, massPoints, datacardPatterns, rootfilePatterns, clsType):
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts   = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" % directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" % fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory, rf % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    # if root files are not found, try with 1pr and 3pr extensions in the name
                    if not os.path.isfile(rfname):
                        rf_1pr = rf.replace(".root","_1pr.root")
                        rf_3pr = rf.replace(".root","_3pr.root")
                        rfname_1pr = os.path.join(self.datacardDirectory, rf_1pr % mass)
                        rfname_3pr = os.path.join(self.datacardDirectory, rf_3pr % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname_1pr)
                        aux.addToDictList(self.rootfiles, mass, rfname_3pr)     
                        if not os.path.isfile(rfname_1pr) or not os.path.isfile(rfname_3pr):                   
                        # if still not found, raise exception/warning
#                            raise Exception("ROOT file (for shapes) '%s' does not exist!" % rfname)
                            print("\033[91mWarning:  ROOT file (for shapes) '%s' does not exist!\033[00m" % rfname)
Ejemplo n.º 9
0
    def __init__(self, opts, directory, massPoints, datacardPatterns,
                 rootfilePatterns, clsType):
        '''
        Constructor
        
        \param directory          Datacard directory

        \param massPoints         List of mass points to calculate the limit

        \param datacardPatterns   List of datacard patterns to include in the limit calculation

        \param rootfilePatterns   List of shape ROOT file patterns to include in the limit calculation

        \param clsType            Object defining the CLs flavour (either LEPType or LHCType).
        '''
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" %
                            directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" %
                                    fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory,
                                              rf % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    # if root files are not found, try with 1pr and 3pr extensions in the name
                    if not os.path.isfile(rfname):
                        # 1+2 b-jets
                        #                        rf_1pr = rf.replace(".root","_2bjets.root")
                        #                        rf_3pr = rf.replace(".root","_1bjet.root")
                        # 0+1 b-jets
                        #                        rf_1pr = rf.replace(".root","_1bjets.root")
                        #                        rf_3pr = rf.replace(".root","_0bjets.root")
                        rf_1pr = rf.replace(".root", "_0bjet.root")
                        rf_3pr = rf.replace(".root", "_1bjet.root")
                        rf_extra = rf.replace(".root", "_2bjets.root")

                        rfname_1pr = os.path.join(self.datacardDirectory,
                                                  rf_1pr % mass)
                        rfname_3pr = os.path.join(self.datacardDirectory,
                                                  rf_3pr % mass)
                        rfname_extra = os.path.join(self.datacardDirectory,
                                                    rf_extra % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname_1pr)
                        aux.addToDictList(self.rootfiles, mass, rfname_3pr)
                        aux.addToDictList(self.rootfiles, mass, rfname_extra)

                        if not os.path.isfile(
                                rfname_1pr) or not os.path.isfile(
                                    rfname_3pr) or not os.path.isfile(
                                        rfname_extra):
                            # if still not found, raise exception/warning
                            #                            raise Exception("ROOT file (for shapes) '%s' does not exist!" % rfname)
                            print(
                                "\033[91mWarning:  ROOT file (for shapes) '%s' does not exist!\033[00m"
                                % rfname)
    def __init__(self, opts, directory, massPoints, datacardPatterns, rootfilePatterns, clsType):
        '''
        Constructor
        
        \param directory          Datacard directory

        \param massPoints         List of mass points to calculate the limit

        \param datacardPatterns   List of datacard patterns to include in the limit calculation

        \param rootfilePatterns   List of shape ROOT file patterns to include in the limit calculation

        \param clsType            Object defining the CLs flavour (either LEPType or LHCType).
        '''
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts   = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" % directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" % fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory, rf % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    # if root files are not found, try with category A/B/C extensions in the name
                    if not os.path.isfile(rfname):
                        rf_a = rf.replace(".root","_a.root")
                        rf_b = rf.replace(".root","_b.root")
                        rf_c = rf.replace(".root","_c.root")
                        rfname_a = os.path.join(self.datacardDirectory, rf_a % mass)
                        rfname_b = os.path.join(self.datacardDirectory, rf_b % mass)
                        rfname_c = os.path.join(self.datacardDirectory, rf_c % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname_a)
                        aux.addToDictList(self.rootfiles, mass, rfname_b)     
                        aux.addToDictList(self.rootfiles, mass, rfname_c)     
                        if not os.path.isfile(rfname_a) or not os.path.isfile(rfname_b) or not os.path.isfile(rfname_c):                   
                        # if still not found, raise exception/warning
#                            raise Exception("ROOT file (for shapes) '%s' does not exist!" % rfname)
                            print("\033[91mWarning:  ROOT file (for shapes) '%s' does not exist!\033[00m" % rfname)
Ejemplo n.º 11
0
    def __init__(self, opts, directory, massPoints, datacardPatterns,
                 rootfilePatterns, clsType):
        self.opts = opts
        self.datacardDirectory = directory
        self.massPoints = massPoints
        self.datacardPatterns = datacardPatterns
        self.rootfilePatterns = rootfilePatterns
        self.clsType = clsType.clone()
        self.jobsCreated = False
        self.datacards = {}
        self.rootfiles = {}
        self.scripts = []
        self.configuration = {}

        if not os.path.isdir(directory):
            raise Exception("Datacard directory '%s' does not exist" %
                            directory)

        # this is a dictionary dumped to configuration.json
        self.configuration = {
            "masspoints": massPoints,
            "datacards": datacardPatterns,
            "rootfiles": rootfilePatterns,
            "codeVersion": git.getCommitId(),
            "clsType": self.clsType.name(),
        }
        clsConfig = self.clsType.getConfiguration(self.configuration)
        if clsConfig != None:
            self.configuration["clsConfig"] = clsConfig

        for mass in self.massPoints:
            for dc in datacardPatterns:
                fname = None
                if "%s" in dc:
                    fname = os.path.join(self.datacardDirectory, dc % mass)
                else:
                    fname = os.path.join(self.datacardDirectory, dc)
                if not os.path.isfile(fname):
                    raise Exception("Datacard file '%s' does not exist!" %
                                    fname)

                aux.addToDictList(self.datacards, mass, fname)

            for rf in rootfilePatterns:
                if rf != None:
                    rfname = None
                    if "%s" in rf:
                        rfname = os.path.join(self.datacardDirectory,
                                              rf % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    else:
                        rfname = os.path.join(self.datacardDirectory, rf)
                        aux.addToDictList(self.rootfiles, mass, rfname)
                    # if root files are not found, try with 1pr and 3pr extensions in the name
                    if not os.path.isfile(rfname):
                        rf_1pr = rf.replace(".root", "_1pr.root")
                        rf_3pr = rf.replace(".root", "_3pr.root")
                        rfname_1pr = os.path.join(self.datacardDirectory,
                                                  rf_1pr % mass)
                        rfname_3pr = os.path.join(self.datacardDirectory,
                                                  rf_3pr % mass)
                        aux.addToDictList(self.rootfiles, mass, rfname_1pr)
                        aux.addToDictList(self.rootfiles, mass, rfname_3pr)
                        if not os.path.isfile(
                                rfname_1pr) or not os.path.isfile(rfname_3pr):
                            # if still not found, raise exception/warning
                            #                            raise Exception("ROOT file (for shapes) '%s' does not exist!" % rfname)
                            print(
                                "\033[91mWarning:  ROOT file (for shapes) '%s' does not exist!\033[00m"
                                % rfname)