def getNormFactorFileList(dirName, fileBaseName):
    scriptList = []

    # For-loop: All items (files/dir) in directory
    for item in os.listdir(dirName):
        fullPath = os.path.join(dirName, item)

        # Skip directories
        if os.path.isdir(fullPath):
            continue

        # Find files matching the script "Base" name (without moduleInfoStrings)
        if item.startswith((fileBaseName).replace("%s.py", "")):
            if item.endswith(".py"):
                scriptList.append(item)

    if len(scriptList) < 1:
        msg = "ERROR! Found no normalization info files under dir %s. Did you generate them?" % dirName
        raise Exception(ShellStyles.ErrorStyle() + msg +
                        ShellStyles.NormalStyle())
    else:
        msg = "Found %s norm-factor file(s):\n\t%s" % (
            len(scriptList), "\n\t".join(
                os.path.join([os.path.join(dirName, s) for s in scriptList])))
        Verbose(ShellStyles.NoteLabel() + msg, True)
    return scriptList
Exemple #2
0
                      default=NORM_EWK_SRC,
                      help="Source of EWK normalisation [default: %s" %
                      (NORM_EWK_SRC))

    (opts, parseArgs) = parser.parse_args()

    # Require at least two arguments (script-name, path to multicrab)
    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(1)

    if opts.mcrab == None:
        Print(
            "Not enough arguments passed to script execution. Printing docstring & EXIT."
        )
        parser.print_help()
        #print __doc__
        sys.exit(1)

    if opts.useInclusiveNorm:
        msg = "Will use only inclusive weight instead of binning (no splitted histograms)"
        Print(ShellStyles.NoteLabel() + msg, True)

    # Call the main function
    main()

    if not opts.batchMode:
        raw_input(
            "=== makeInvertedPseudoMulticrab.py: Press any key to quit ROOT ..."
        )
Exemple #3
0
def importNormFactors(era, searchMode, optimizationMode, multicrabDirName):
    '''
    Imports the auto-generates  QCDInvertedNormalizationFactors.py file, which is 
    created by the plotting/fitting templates script  (plotQCD_Fit.py)
    
    This containsthe results  of fitting to the Baseline Data the templates m_{jjb} 
    shapes from the QCD (Inverted Data) and EWK (Baseline MC).
 
    Results include the fit details for each shape and the QCD NormFactor for moving 
    from the ControlRegion (CR) to the Signal Region (SR).
    
    The aforementioned python file and a folder with the histogram ROOT files and the individual
    fits. The foler name will be normalisationPlots/<OptsMode> and will be placed inside the
    <pseudomulticrab_dir>. The autogenerated file file be place in the cwd (i.e. work/)
    '''
    # Find candidates for normalisation scripts
    scriptList = getNormFactorFileList(dirName=multicrabDirName,
                                       fileBaseName=opts.normFactorsSrc)

    # Create a string with the module information used
    moduleInfoString = getModuleInfoString(era, searchMode, optimizationMode)

    # Construct source file name
    src = getGetNormFactorsSrcFilename(multicrabDirName,
                                       opts.normFactorsSrc % moduleInfoString)

    # Check if normalization coefficients are suitable for the choses era
    Verbose("Reading normalisation factors from:\n\t%s" % src, True)

    # Split the path to get just the file name of src
    pathList = src.replace(".py", "").split("/")

    # Insert the directory where the normFactor files reside into the path so that they are found
    if len(pathList) > 1:
        cwd = os.getenv("PWD")
        # Get directories to src in a list [i.e. remove the last entry (file-name) from the pathList]
        dirList = map(str, pathList[:(len(pathList) - 1)])
        srcDir = "/".join(dirList)
        sys.path.insert(0, os.path.join(cwd, srcDir))

    # Import the (normFactor) src file
    normFactorsImport = __import__(os.path.basename("/".join(pathList)))

    # Get the function definition
    myNormFactorsSafetyCheck = getattr(normFactorsImport,
                                       "QCDInvertedNormalizationSafetyCheck")

    Verbose(
        "Check that the era=%s, searchMode=%s, optimizationMode=%s info matches!"
        % (era, searchMode, optimizationMode))
    myNormFactorsSafetyCheck(era, searchMode, optimizationMode)

    # Obtain normalization factors
    myNormFactorsImport = getattr(normFactorsImport, "QCDNormalization")
    msg = "Disabled NormFactors Syst Var Fake Weighting Up/Down"
    Print(ShellStyles.WarningLabel() + msg, True)
    # myNormFactorsImportSystVarFakeWeightingDown = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarDown") #FIXME
    # myNormFactorsImportSystVarFakeWeightingUp   = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarUp")   #FIXME

    myNormFactors = {}
    myNormFactors["nominal"] = myNormFactorsImport
    msg = "Obtained \"nominal\" QCD normalisation factors dictionary. The values are:\n"
    for k in myNormFactors["nominal"]:
        msg += "\t" + k + " = " + str(myNormFactors["nominal"][k])
    Print(ShellStyles.NoteLabel() + msg, True)

    msg = "Disabled NormFactors Weighting Up/Down"
    Print(ShellStyles.WarningLabel() + msg, True)
    # myNormFactors["FakeWeightingDown"] = myNormFactorsImportSystVarFakeWeightingDown # FIXME
    # myNormFactors["FakeWeightingUp"]   = myNormFactorsImportSystVarFakeWeightingUp   # FIXME
    return myNormFactors
    def _doCalculate(self, shape, moduleInfoString, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms):
        '''
        Calculates the result
        '''
        Verbose("Calculate final shape in signal region (shape * w_QCD) & initialize result containers", True)
        nSplitBins = shape.getNumberOfPhaseSpaceSplitBins()

        Verbose("Create Shape", True)
        self._resultShape = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShape.Reset()
        self._resultShape.SetTitle("NQCDFinal_Total_%s"%moduleInfoString)
        self._resultShape.SetName("NQCDFinal_Total_%s"%moduleInfoString)

        Verbose("Create EWK shape", True)
        self._resultShapeEWK = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShapeEWK.Reset()
        self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s"%moduleInfoString)
        self._resultShapeEWK.SetName("NQCDFinal_EWK_%s"%moduleInfoString)

        Verbose("Create Purity shape", True)
        self._resultShapePurity = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0))
        self._resultShapePurity.Reset()
        self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s"%moduleInfoString)
        self._resultShapePurity.SetName("NQCDFinal_Purity_%s"%moduleInfoString)

        self._histogramsList = []
        myUncertaintyLabels  = ["statData", "statEWK"]
        self._resultCountObject = extendedCount.ExtendedCount(0.0, [0.0, 0.0], myUncertaintyLabels)

        if optionDoNQCDByBinHistograms:
            for i in range(0, nSplitBins):
                hBin = aux.Clone(self._resultShape)
                hBin.SetTitle("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
                hBin.SetName("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString))
                self._histogramsList.append(hBin)

        if isinstance(self._resultShape, ROOT.TH2):
            self._doCalculate2D(nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels)
            return

        # Intialize counters for purity calculation in final shape binning
        myShapeDataSum       = []
        myShapeDataSumUncert = []
        myShapeEwkSum        = []
        myShapeEwkSumUncert  = []

        # For-loop: All Bins
        for j in range(1,self._resultShape.GetNbinsX()+1):
            myShapeDataSum.append(0.0)
            myShapeDataSumUncert.append(0.0)
            myShapeEwkSum.append(0.0)
            myShapeEwkSumUncert.append(0.0)

        Verbose("Calculate results separately for each phase-space bin and then combine", True)
        # For-loop: All measurement bins (e.g. tau pT bins for HToTauNu)
        for i in range(0, nSplitBins):
            # N.B: The \"Inclusive\" value is in the zeroth bin

            Verbose("Get data-driven QCD, data, and MC EWK shape histogram for the phase-space bin", True)
            h     = shape.getDataDrivenQCDHistoForSplittedBin(i)
            hData = shape.getDataHistoForSplittedBin(i)
            hEwk  = shape.getEwkHistoForSplittedBin(i)
            
            Verbose("Get normalization factor", True)
            wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i)
            if self._optionUseInclusiveNorm:
                wQCDLabel = "Inclusive"
            wQCD = 0.0
            
            if not wQCDLabel in normFactors.keys():
                msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel,shape.getHistoName())
                Print(ShellStyles.WarningLabel() + msg, True)
            else:
                wQCD = normFactors[wQCDLabel]                
            msg = "Weighting bin \"%i\" (label=\"%s\")  with normFactor \"%s\"" % (i, wQCDLabel, wQCD)
            Verbose(ShellStyles.NoteLabel() + msg, True)

            # Construct info table (debugging)
            table  = []
            align  = "{:>6} {:^10} {:^15} {:>10} {:>10} {:>10} {:^3} {:^8} {:^3} {:^8}"
            header = align.format("Bin", "Width", "Range", "Content", "NormFactor", "QCD", "+/-", "Data", "+/-", "EWK")
            hLine  = "="*90
            table.append("{:^90}".format(shape.getHistoName()))
            table.append(hLine)
            table.append(header)
            table.append(hLine)

            binSum    = 0.0
            nBins     = h.GetNbinsX()
            binWidth  = hData.GetBinWidth(0)
            xMin      = hData.GetXaxis().GetBinCenter(0)
            xMax      = hData.GetXaxis().GetBinCenter(nBins+1)

            # For-Loop (nested): All bins in the shape histogram 
            for j in range(1, nBins+1):

                # Initialise values
                myResult         = 0.0
                myStatDataUncert = 0.0
                myStatEwkUncert  = 0.0

                # Ignore zero bins
                if abs(h.GetBinContent(j)) > 0.00001:
                    Verbose("Calculating the result")
                    binContent = h.GetBinContent(j)
                    binRange   = "%.1f -> %.1f" % (h.GetXaxis().GetBinLowEdge(j), h.GetXaxis().GetBinUpEdge(j) )
                    binWidth   = GetTH1BinWidthString(h, j)
                    binSum    += binContent
                    myResult   = binContent * wQCD #apply  normalisation factor (transfer from CR to SR))

                    Verbose("Calculate abs. stat. uncert. for data and for MC EWK (Do not calculate here MC EWK syst.)", True)
                    myStatDataUncert = hData.GetBinError(j) * wQCD
                    myStatEwkUncert  = hEwk.GetBinError(j)  * wQCD
                    table.append(align.format(j, binWidth, binRange, "%0.1f" % binContent, wQCD, "%.1f" % myResult, "+/-", "%.1f" % myStatDataUncert, "+/-", "%.1f" % myStatEwkUncert))

                # Get count object
                myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels)
                self._resultCountObject.add(myCountObject)

                if optionDoNQCDByBinHistograms:
                    Verbose("Setting bin content \"%i\"" % (j), True)
                    self._histogramsList[i].SetBinContent(j, myCountObject.value())
                    self._histogramsList[i].SetBinError(j, myCountObject.statUncertainty())

                binContent = self._resultShape.GetBinContent(j) + myCountObject.value()
                binError   = self._resultShape.GetBinError(j) + myCountObject.statUncertainty()**2
                Verbose("Setting bin %i to content %0.1f +/- %0.1f" % (j, binContent, binError), j==0)
                self._resultShape.SetBinContent(j, binContent)
                self._resultShape.SetBinError(j, binError) # Sum squared (take sqrt outside loop on final squared sum)
                
                Verbose("Sum items for purity calculation", True)
                myShapeDataSum[j-1]       += hData.GetBinContent(j)*wQCD
                myShapeDataSumUncert[j-1] += (hData.GetBinError(j)*wQCD)**2
                myShapeEwkSum[j-1]        += hEwk.GetBinContent(j)*wQCD
                myShapeEwkSumUncert[j-1]  += (hEwk.GetBinError(j)*wQCD)**2

            # Delete the shape histograms
            h.Delete()
            hData.Delete()
            hEwk.Delete()

        # For-loop: All shape bins
        for j in range(1,self._resultShape.GetNbinsX()+1):
            # Take square root of uncertainties
            self._resultShape.SetBinError(j, math.sqrt(self._resultShape.GetBinError(j)))

        # Print detailed results in a formatted table
        qcdResults = self._resultCountObject.getResultAndStatErrorsDict()
        bins       = "%0.f-%.0f" % (1, nBins)
        binRange   = "%.1f -> %.1f" % (xMin, xMax)
        binSum     = "%.1f" % binSum
        nQCD       = "%.1f" % qcdResults["value"]
        dataStat   = "%.1f" % qcdResults["statData"]
        ewkStat    = "%.1f" % qcdResults["statEWK"]
        table.append(align.format(bins, binWidth, binRange, binSum, wQCD, nQCD, "+/-", dataStat, "+/-", ewkStat))
        table.append(hLine)
        for i, line in enumerate(table):
            if i == len(table)-2:
                Verbose(ShellStyles.TestPassedStyle()+line+ShellStyles.NormalStyle(), i==0)
            else:
                Verbose(line, i==0)

        if optionPrintPurityByBins:
            Verbose("Printing Shape Purity bin-by-bin.", True)
            self.PrintPurityByBins(nBins, shape, myShapeDataSum, myShapeDataSumUncert, myShapeEwkSum, myShapeEwkSumUncert)
        return