def _sanityChecks(self, dsetMgr, dirName, plotName): ''' Check existence of histograms ''' # Definitions myStatus = True myFoundStatus = True # For-loop: All EWK datasets for d in dsetMgr.getDataset("EWK").datasets: if not d.hasRootHisto("%s/%s" % (dirName,plotName) ): myFoundStatus = False # If something is wrong if not myFoundStatus: myStatus = False msg = "Skipping '%s', because it does not exist for all EWK datasets (you probably forgot to set histo level to Vital when producing the multicrab)!" % (plotName) Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), True) else: (myRootObject, myRootObjectName) = dsetMgr.getDataset("EWK").getFirstRootHisto("%s/%s" % (dirName,plotName) ) if isinstance(myRootObject, ROOT.TH2): msg ="Skipping '%s', because it is not a TH1 object" % (plotName) Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), True) myStatus = False myRootObject.Delete() return myStatus
def _sanityChecks(self, dsetMgr, dirName, plotName): myStatus = True myFoundStatus = True for d in dsetMgr.getDataset("EWK").datasets: if not d.hasRootHisto("%s/%s"%(dirName,plotName)): myFoundStatus = False if not myFoundStatus: myStatus = False print ShellStyles.WarningLabel()+"Skipping '%s', because it does not exist for all EWK datasets (you probably forgot to set histo level to Vital when producing the multicrab)!"%(plotName)+ShellStyles.NormalStyle() else: (myRootObject, myRootObjectName) = dsetMgr.getDataset("EWK").getFirstRootHisto("%s/%s"%(dirName,plotName)) if isinstance(myRootObject, ROOT.TH2): print ShellStyles.WarningLabel()+"Skipping '%s', because it is not a TH1 object!"%(plotName)+ShellStyles.NormalStyle() myStatus = False myRootObject.Delete() return myStatus
def __init__( self, dataPath, ewkPath, dsetMgr, luminosity, moduleInfoString, normFactors, #dataDrivenFakeTaus=False, #shapeOnly=False, #displayPurityBreakdown=False, #optionUseInclusiveNorm=False, optionCalculateQCDNormalizationSyst=True, normDataSrc=None, normEWKSrc=None, optionUseInclusiveNorm=False): self._shapePlots = [] self._shapePlotLabels = [] self._QCDNormalizationSystPlots = [] self._QCDNormalizationSystPlotLabels = [] self._moduleInfoString = moduleInfoString self._useInclusiveNorm = optionUseInclusiveNorm if len(normFactors.keys()) == 1 and normFactors.keys( )[0] == "Inclusive": self._useInclusiveNorm = True print ShellStyles.HighlightStyle( ) + "...Obtaining final shape" + ShellStyles.NormalStyle() # Determine list of plots to consider myObjects = dsetMgr.getDataset("Data").getDirectoryContent(dataPath) # Loop over plots to consider i = 0 for plotName in myObjects: i += 1 print ShellStyles.HighlightStyle( ) + "...Obtaining ctrl plot %d/%d: %s%s" % ( i, len(myObjects), plotName, ShellStyles.NormalStyle()) # Check that histograms exist mySkipStatus = self._sanityChecks(dsetMgr, dataPath, plotName) and self._sanityChecks( dsetMgr, ewkPath, plotName) if not mySkipStatus: continue # Obtain shape plots (the returned object is not owned) # print "DEBUG: ewkPath: ", ewkPath myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath, dsetMgr, plotName, luminosity, normFactors) # Obtain plots for systematics coming from met shape difference for control plots if optionCalculateQCDNormalizationSyst: if isinstance(myShapeHisto, ROOT.TH2): print ShellStyles.WarningLabel( ) + "Skipping met shape uncertainty because histogram has more than 1 dimensions!" else: self._obtainQCDNormalizationSystHistograms( myShapeHisto, dsetMgr, plotName, luminosity, normDataSrc, normEWKSrc)
def _obtainFinalShapeHistogram(self, histoName): if histoName == None: raise Exception(ShellStyles.ErrorLabel()+"You forgot to give final shape histo name or to cache the final shape histogram!") print ShellStyles.WarningLabel()+"Final shape histo was not cached to QCDInvertedSystematics. Obtaining final shape from '%s'."%histoName # Obtain final result myFinalShape = DataDrivenQCDShape(self._dsetMgr, "Data", "EWK", histoName, self._luminosity, rebinList=self._myRebinList) myFinalShapeResult = QCDInvertedShape(myFinalShape, self._moduleInfoString, self._normFactors, optionPrintPurityByBins=False) self._hFinalShape = myFinalShapeResult.getResultShape().Clone()
def __init__(self, dsetMgr, dsetLabelData, dsetLabelEwk, histoName, dataPath, ewkPath, luminosity, optionUseInclusiveNorm, verbose=False): self._verbose = verbose self._uniqueN = 0 self._splittedHistoReader = splittedHistoReader.SplittedHistoReader( dsetMgr, dsetLabelData) self._histoName = histoName self._optionUseInclusiveNorm = optionUseInclusiveNorm #ALEX-NEW dataFullName = os.path.join(dataPath, histoName) ewkFullName = os.path.join(ewkPath, histoName) # ALEX-NEW if (self._optionUseInclusiveNorm): msg = "Disabled call for getting splitted histograms. Getting \"Inclusive\" histogram only instead." self.Verbose(ShellStyles.WarningLabel() + msg, self._verbose) self._dataList = list( self._getInclusiveHistogramsFromSingleSource( dsetMgr, dsetLabelData, dataFullName, luminosity)) # was called by default self._ewkList = list( self._getInclusiveHistogramsFromSingleSource( dsetMgr, dsetLabelEwk, ewkFullName, luminosity)) # was called by default else: msg = "This splitted histograms method is not validated! Use \"Inclusive\" histogram only instead." self.Print(ShellStyles.WarningLabel() + msg, False) self._dataList = list( self._splittedHistoReader.getSplittedBinHistograms( dsetMgr, dsetLabelData, dataFullName, luminosity)) #FIXME: Does this work for Inclusive? self._ewkList = list( self._splittedHistoReader.getSplittedBinHistograms( dsetMgr, dsetLabelEwk, ewkFullName, luminosity)) #FIXME: Does this work for Inclusive? return
def __init__(self, dataPath, ewkPath, dsetMgr, luminosity, moduleInfoString, normFactors, optionDoFakeBNormalisationSyst=True, normDataSrc=None, normEWKSrc=None, optionUseInclusiveNorm=False, keyList=[], verbose=False): self._verbose = verbose self._shapePlots = [] self._shapePlotLabels = [] self._QCDNormalizationSystPlots = [] self._QCDNormalizationSystPlotLabels = [] self._moduleInfoString = moduleInfoString self._useInclusiveNorm = optionUseInclusiveNorm if len(normFactors.keys()) == 1 and normFactors.keys()[0] == "Inclusive": self._useInclusiveNorm = True self._histoPathsData= self._GetHistoPaths(dsetMgr, "Data", dataPath, keyList) if ewkPath == dataPath: self._histoPathsEWK = self._histoPathsData else: self._histoPathsEWK = self._GetHistoPaths(dsetMgr, "EWK" , ewkPath , keyList) # Sanity check if len(self._histoPathsEWK) != len(self._histoPathsData): msg = "List of histograms for EWK does not match in size that of Data" raise Exception(ShellStyles.ErrorLabel() + msg + ShellStyles.NormalStyle()) # For-Loop: All plots to consider for i, plotName in enumerate(self._histoPathsData, 1): # Inform user of progress msg = "{:<9} {:>3} {:<1} {:<3} {:<80}".format("Histogram", "%i" % i, "/", "%s:" % (len(self._histoPathsData)), os.path.join(dataPath, plotName) ) self.PrintFlushed(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), False) if "JetEtaPhi_AfterAllSelections" in plotName: continue # Ensure that histograms exist && pass other sanity checks dataOk = self._sanityChecks(dsetMgr, dataPath, plotName) ewkOk = self._sanityChecks(dsetMgr, ewkPath , plotName) if dataOk*ewkOk == False: self.Print(ShellStyles.ErrorStyle() + msg + ShellStyles.NormalStyle(), i==1) continue self.Verbose("Obtaining shape plots (the returned object is not owned)", True) myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath, dsetMgr, plotName, luminosity, normFactors) # Obtain plots for systematics coming from invariant mass shape difference if optionDoFakeBNormalisationSyst: if isinstance(myShapeHisto, ROOT.TH2): msg = "Skipping invariant mass shape uncertainty because histogram has more than 1 dimensions!" self.Print(ShellStyles.WarningLabel() + msg, True) else: self._obtainQCDNormalizationSystHistograms(myShapeHisto, dsetMgr, plotName, luminosity, normDataSrc, normEWKSrc) #iro: fixme (missing plots) msg = "Obtaining final shape from data path %s" % (ShellStyles.NoteStyle() + dataPath + ShellStyles.NormalStyle()) self.Verbose(msg, True) return
def _findCommonAvailableModules(self, itemLabel, primaryLabel, primaryList, otherLabel, otherList): availableList = [] # Loop over first list to find common items for item in primaryList: if item in otherList: availableList.append(item) else: if not item in otherList: print ShellStyles.WarningLabel( ) + " %s selection: item '%s' is available in '%s', but missing from '%s'!" % ( itemLabel, item, primaryLabel, otherLabel) # Return list of items available in both multicrab directories return availableList
def getIntegratedPurityForShapeHisto(self): ''' Return the QCD purity in bins of the final shape ''' hData = self.getIntegratedDataHisto() hEwk = self.getIntegratedEwkHisto() # newName = ("_".join(hData.GetName().split("_", 2)[:2]) + "_IntegratedPurity_" + str(self._uniqueN) ) newName = hData.GetName() + "_Purity" h = aux.Clone(hData, newName) nameList = self._dataList[0].GetName().split("_") newTitle = "PurityByFinalShapeBin_%s" % nameList[0][:len(nameList[0]) - 1] h.SetTitle(newTitle) self._uniqueN += 1 # For-loop: All bins for i in range(1, h.GetNbinsX() + 1): myPurity = 0.0 myUncert = 0.0 nData = hData.GetBinContent(i) nEWK = hEwk.GetBinContent(i) # Calculate the purity if (nData > 0.0): myPurity = (nData - nEWK) / nData # Sanity check if myPurity < 0.0: myPurity = 0.0 myUncert = 0.0 else: # Assume binomial error myUncertSq = myPurity * (1.0 - myPurity) / nData if myUncertSq >= 0.0: myUncert = sqrt(myUncertSq) else: msg = "Purity is greater than 1 (%.4f) in bin %i of histogram %s" % ( myPurity, i, h.GetName()) self.Verbose(ShellStyles.WarningLabel() + msg, True) myUncert = 0.0 # Set the purity value for the given bin h.SetBinContent(i, myPurity) h.SetBinError(i, myUncert) return h
def getIntegratedPurityForShapeHisto(self): ''' Return the QCD purity in bins of the final shape ''' hData = self.getIntegratedDataHisto() hEwk = self.getIntegratedEwkHisto() #cloneName = "%s_purity_%d" % (hData, self._uniqueN) # original code cloneName = ("_".join(hData.GetName().split("_", 2)[:2]) + "_IntegratedPurity_" + str(self._uniqueN)) h = aux.Clone(hData, cloneName) nameList = self._dataList[0].GetName().split("_") h.SetTitle("PurityByFinalShapeBin_%s" % nameList[0][:len(nameList[0]) - 1]) self._uniqueN += 1 # For-loop: All bins for i in range(1, h.GetNbinsX() + 1): myPurity = 0.0 myUncert = 0.0 if (hData.GetBinContent(i) > 0.0): myPurity = (hData.GetBinContent(i) - hEwk.GetBinContent(i)) / hData.GetBinContent(i) if myPurity < 0.0: myPurity = 0.0 myUncert = 0.0 else: # Assume binomial error myUncertSq = myPurity * (1.0 - myPurity) / hData.GetBinContent(i) if myUncertSq >= 0.0: myUncert = sqrt(myUncertSq) else: msg = "Purity is greater than 1 (%.4f) in bin %i of histogram %s" % ( myPurity, i, h.GetName()) self.Verbose(ShellStyles.WarningLabel() + msg, True) myUncert = 0.0 h.SetBinContent(i, myPurity) h.SetBinError(i, myUncert) return h
def main(): # Object for selecting data eras, search modes, and optimization modes myModuleSelector = analysisModuleSelector.AnalysisModuleSelector() # Obtain multicrab directory myMulticrabDir = "." if opts.mcrab != None: myMulticrabDir = opts.mcrab if not os.path.exists("%s/multicrab.cfg" % myMulticrabDir): msg = "No multicrab directory found at path '%s'! Please check path or specify it with --mcrab!" % ( myMulticrabDir) raise Exception(ShellStyles.ErrorLabel() + msg + ShellStyles.NormalStyle()) if len(opts.shape) == 0: raise Exception( ShellStyles.ErrorLabel() + "Provide a shape identifierwith --shape (for example MT)!" + ShellStyles.NormalStyle()) # Obtain dsetMgrCreator and register it to module selector dsetMgrCreator = dataset.readFromMulticrabCfg(directory=myMulticrabDir) # Obtain systematics names mySystematicsNamesRaw = dsetMgrCreator.getSystematicVariationSources() mySystematicsNames = [] for item in mySystematicsNamesRaw: mySystematicsNames.append("%sPlus" % item) mySystematicsNames.append("%sMinus" % item) if opts.test: mySystematicsNames = [] #[mySystematicsNames[0]] #FIXME # Set the primary source myModuleSelector.setPrimarySource(label=opts.analysisName, dsetMgrCreator=dsetMgrCreator) # Select modules myModuleSelector.doSelect(opts=None) #FIXME: (opts=opts) # Loop over era/searchMode/optimizationMode combos myDisplayStatus = True myTotalModules = myModuleSelector.getSelectedCombinationCount() * ( len(mySystematicsNames) + 1) * len(opts.shape) Verbose("Found %s modules in total" % (myTotalModules), True) count, nEras, nSearchModes, nOptModes, nSysVars = myModuleSelector.getSelectedCombinationCountIndividually( ) if nSysVars > 0: msg = "Will run over %d modules (%d eras x %d searchModes x %d optimizationModes x %d systematic variations)" % ( count, nEras, nSearchModes, nOptModes, nSysVars) else: msg = "Will run over %d modules (%d eras x %d searchModes x %d optimizationModes)" % ( count, nEras, nSearchModes, nOptModes) Print(msg, True) # Create pseudo-multicrab creator myOutputCreator = pseudoMultiCrabCreator.PseudoMultiCrabCreator( opts.analysisName, myMulticrabDir) # Make time stamp for start time myGlobalStartTime = time.time() iModule = 0 # For-loop: All Shapes for shapeType in opts.shape: # Initialize myOutputCreator.initialize(shapeType, prefix="") msg = "Creating dataset for shape \"%s\"%s" % ( shapeType, ShellStyles.NormalStyle()) Verbose(ShellStyles.HighlightStyle() + msg, True) # Get lists of settings erasList = myModuleSelector.getSelectedEras() modesList = myModuleSelector.getSelectedSearchModes() optList = myModuleSelector.getSelectedOptimizationModes() optList.append("") #append the default opt mode! # For-Loop over era, searchMode, and optimizationMode options for era in erasList: for searchMode in modesList: for optimizationMode in optList: Verbose( "era = %s, searchMode = %s, optMode = %s" % (era, searchMode, optimizationMode), True) # If an optimization mode is defined in options skip the rest if opts.optMode != None: if optimizationMode != opts.optMode: continue # Obtain normalization factors myNormFactors = importNormFactors(era, searchMode, optimizationMode, opts.mcrab) # Nominal module myModuleInfoString = getModuleInfoString( era, searchMode, optimizationMode) iModule += 1 # Inform user of what is being processes msg = "Module %d/%d:%s %s/%s" % ( iModule, myTotalModules, ShellStyles.NormalStyle(), myModuleInfoString, shapeType) Print(ShellStyles.CaptionStyle() + msg, True) # Keep time myStartTime = time.time() Verbose("Create dataset manager with given settings", True) nominalModule = ModuleBuilder(opts, myOutputCreator) nominalModule.createDsetMgr(myMulticrabDir, era, searchMode, optimizationMode) if (iModule == 1): if opts.verbose: nominalModule.debug() doQCDNormalizationSyst = False #FIXME if not doQCDNormalizationSyst: msg = "Disabling systematics" Print(ShellStyles.WarningLabel() + msg, True) nominalModule.buildModule(opts.dataSrc, opts.ewkSrc, myNormFactors["nominal"], doQCDNormalizationSyst, opts.normDataSrc, opts.normEwkSrc) if len(mySystematicsNames) > 0: Print( "Adding QCD normalization systematics (iff also other systematics present) ", True) nominalModule.buildQCDNormalizationSystModule( opts.dataSrc, opts.ewkSrc) # FIXME: add quark gluon weighting systematics! if 0: Print("Adding Quark/Gluon weighting systematics", True) nominalModule.buildQCDQuarkGluonWeightingSystModule( opts.dataSrc, opts.ewkSrc, myNormFactors["FakeWeightingUp"], myNormFactors["FakeWeightingDown"], False, opts.normDataSrc, opts.normEwkSrc) Verbose("Deleting nominal module", True) nominalModule.delete() Verbose("Printing time estimate", True) printTimeEstimate(myGlobalStartTime, myStartTime, iModule, myTotalModules) Verbose("Now do the rest of systematics variations", True) for syst in mySystematicsNames: iModule += 1 msg = "Analyzing systematics variations %d/%d: %s/%s/%s" % ( iModule, myTotalModules, myModuleInfoString, syst, shapeType) Print( ShellStyles.CaptionStyle() + msg + ShellStyles.NormalStyle(), True) myStartTime = time.time() systModule = ModuleBuilder(opts, myOutputCreator) # Create dataset manager with given settings systModule.createDsetMgr(myMulticrabDir, era, searchMode, optimizationMode, systematicVariation=syst) # Build asystematics module systModule.buildModule(opts.dataSrc, opts.ewkSrc, myNormFactors["nominal"], False, opts.normDataSrc, opts.normEwkSrc) printTimeEstimate(myGlobalStartTime, myStartTime, iModule, myTotalModules) systModule.delete() Verbose("Pseudo-multicrab ready for %s" % shapeType, True) # Create rest of pseudo multicrab directory myOutputCreator.silentFinalize() # Print some timing statistics Print( "Average processing time per module was %.1f s" % getAvgProcessTimeForOneModule(myGlobalStartTime, myTotalModules), True) Print( "Total elapsed time was %.1f s" % getTotalElapsedTime(myGlobalStartTime), False) msg = "Created pseudo-multicrab %s for shape type \"%s\"" % ( myOutputCreator.getDirName(), shapeType) Print(ShellStyles.SuccessLabel() + msg, True) return
def importNormFactors(era, searchMode, optimizationMode, multicrabDirName): ''' Imports the auto-generates QCDInvertedNormalizationFactors.py file, which is created by the plotting/fitting templates script (plotQCD_Fit.py) This containsthe results of fitting to the Baseline Data the templates m_{jjb} shapes from the QCD (Inverted Data) and EWK (Baseline MC). Results include the fit details for each shape and the QCD NormFactor for moving from the ControlRegion (CR) to the Signal Region (SR). The aforementioned python file and a folder with the histogram ROOT files and the individual fits. The foler name will be normalisationPlots/<OptsMode> and will be placed inside the <pseudomulticrab_dir>. The autogenerated file file be place in the cwd (i.e. work/) ''' # Find candidates for normalisation scripts scriptList = getNormFactorFileList(dirName=multicrabDirName, fileBaseName=opts.normFactorsSrc) # Create a string with the module information used moduleInfoString = getModuleInfoString(era, searchMode, optimizationMode) # Construct source file name src = getGetNormFactorsSrcFilename(multicrabDirName, opts.normFactorsSrc % moduleInfoString) # Check if normalization coefficients are suitable for the choses era Verbose("Reading normalisation factors from:\n\t%s" % src, True) # Split the path to get just the file name of src pathList = src.replace(".py", "").split("/") # Insert the directory where the normFactor files reside into the path so that they are found if len(pathList) > 1: cwd = os.getenv("PWD") # Get directories to src in a list [i.e. remove the last entry (file-name) from the pathList] dirList = map(str, pathList[:(len(pathList) - 1)]) srcDir = "/".join(dirList) sys.path.insert(0, os.path.join(cwd, srcDir)) # Import the (normFactor) src file normFactorsImport = __import__(os.path.basename("/".join(pathList))) # Get the function definition myNormFactorsSafetyCheck = getattr(normFactorsImport, "QCDInvertedNormalizationSafetyCheck") Verbose( "Check that the era=%s, searchMode=%s, optimizationMode=%s info matches!" % (era, searchMode, optimizationMode)) myNormFactorsSafetyCheck(era, searchMode, optimizationMode) # Obtain normalization factors myNormFactorsImport = getattr(normFactorsImport, "QCDNormalization") msg = "Disabled NormFactors Syst Var Fake Weighting Up/Down" Print(ShellStyles.WarningLabel() + msg, True) # myNormFactorsImportSystVarFakeWeightingDown = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarDown") #FIXME # myNormFactorsImportSystVarFakeWeightingUp = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarUp") #FIXME myNormFactors = {} myNormFactors["nominal"] = myNormFactorsImport msg = "Obtained \"nominal\" QCD normalisation factors dictionary. The values are:\n" for k in myNormFactors["nominal"]: msg += "\t" + k + " = " + str(myNormFactors["nominal"][k]) Print(ShellStyles.NoteLabel() + msg, True) msg = "Disabled NormFactors Weighting Up/Down" Print(ShellStyles.WarningLabel() + msg, True) # myNormFactors["FakeWeightingDown"] = myNormFactorsImportSystVarFakeWeightingDown # FIXME # myNormFactors["FakeWeightingUp"] = myNormFactorsImportSystVarFakeWeightingUp # FIXME return myNormFactors
def main(opts): # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(True) style.setGridY(True) # Obtain dsetMgrCreator and register it to module selector dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab) # Get list of eras, modes, and optimisation modes erasList = dsetMgrCreator.getDataEras() modesList = dsetMgrCreator.getSearchModes() optList = dsetMgrCreator.getOptimizationModes() sysVarList = dsetMgrCreator.getSystematicVariations() sysVarSrcList = dsetMgrCreator.getSystematicVariationSources() # If user does not define optimisation mode do all of them if opts.optMode == None: if len(optList) < 1: optList.append("") optModes = optList else: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Get the PSets: if 0: datasetsMgr.printSelections() #PrintPSet("BJetSelection", datasetsMgr, depth=150) # ZJets and DYJets overlap! if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames( ) and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames(): Print( "Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True) datasetsMgr.remove( filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) # Get luminosity if a value is not specified if opts.intLumi < 0: opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity() # Remove datasets removeList = ["QCD-b"] #, "Charged"] if not opts.useMC: removeList.append("QCD") for i, d in enumerate(removeList, 0): msg = "Removing dataset %s" % d Verbose( ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i == 0) datasetsMgr.remove( filter(lambda name: d in name, datasetsMgr.getAllDatasetNames())) # Print summary of datasets to be used if 0: datasetsMgr.PrintInfo() # Merge EWK samples datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets()) # Print dataset information datasetsMgr.PrintInfo() # Do the fit on the histo after ALL selections (incl. topology cuts) folderList = datasetsMgr.getDataset( datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent( opts.folder) #folderList1 = [h for h in folderList if "TetrajetPt" in h] #folderList1 = [h for h in folderList if "TetrajetMass" in h] #folderList1 = [h for h in folderList if "MET" in h] #folderList1 = [h for h in folderList if "TetrajetBJetPt" in h] folderList1 = [h for h in folderList if "QGLR" in h] folderList2 = [ h for h in folderList1 if "CRtwo" in h or "VR" in h or "SR" in h or "CRone" in h ] # For-loop: All folders histoPaths = [] for f in folderList2: folderPath = os.path.join(opts.folder, f) histoList = datasetsMgr.getDataset( datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent( folderPath) pathList = [os.path.join(folderPath, h) for h in histoList] histoPaths.extend(pathList) binLabels = GetBinLabels("CRone", histoPaths) PlotHistosAndCalculateTF(datasetsMgr, histoPaths, binLabels, opts) return
def main(opts): # Object for selecting data eras, search modes, and optimization modes myModuleSelector = analysisModuleSelector.AnalysisModuleSelector() # Obtain dsetMgrCreator and register it to module selector dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab) # Obtain systematics names mySystematicsNamesRaw = dsetMgrCreator.getSystematicVariationSources() mySystematicsNames = [] for i, item in enumerate(mySystematicsNamesRaw, 0): Print( "Using systematic %s" % (ShellStyles.NoteStyle() + item + ShellStyles.NormalStyle()), i == 0) mySystematicsNames.append("%sPlus" % item) mySystematicsNames.append("%sMinus" % item) if opts.test: mySystematicsNames = [] # Set the primary source Verbose( "Setting the primary source (label=%s)" % (ShellStyles.NoteStyle() + opts.analysisName + ShellStyles.NormalStyle()), True) myModuleSelector.setPrimarySource( label=opts.analysisName, dsetMgrCreator=dsetMgrCreator) #fixme: what is label for? # Select modules myModuleSelector.doSelect(opts=None) #fixme: (opts=opts) # Loop over era/searchMode/optimizationMode combos myTotalModules = myModuleSelector.getSelectedCombinationCount() * ( len(mySystematicsNames) + 1) * len(opts.shape) Verbose("Found %s modules in total" % (myTotalModules), True) count, nEras, nSearchModes, nOptModes, nSysVars = myModuleSelector.getSelectedCombinationCountIndividually( ) if nSysVars > 0: msg = "Running over %d modules (%d eras x %d searchModes x %d optimizationModes x %d systematic variations)" % ( count, nEras, nSearchModes, nOptModes, nSysVars) else: msg = "Running over %d modules (%d eras x %d searchModes x %d optimizationModes)" % ( count, nEras, nSearchModes, nOptModes) Verbose(msg, True) # Create pseudo-multicrab creator msg = "Will create pseudo-dataset %s inside the pseudo-multicrab directory" % ( ShellStyles.NoteStyle() + opts.analysisName + ShellStyles.NormalStyle()) Verbose(msg, True) myOutputCreator = pseudoMultiCrabCreator.PseudoMultiCrabCreator( opts.analysisName, opts.mcrab, verbose=opts.verbose) # Make time stamp for start time myGlobalStartTime = time.time() iModule = 0 # For-loop: All Shapes for iShape, shapeType in enumerate(opts.shape, 1): msg = "Shape %d/%d:%s %s" % (iShape, len( opts.shape), ShellStyles.NormalStyle(), shapeType) Print(ShellStyles.CaptionStyle() + msg, True) # Initialize myOutputCreator.initialize( subTitle=shapeType, prefix="") #fixeme: remove shapeType from sub-directory name? # Get lists of settings erasList = myModuleSelector.getSelectedEras() modesList = myModuleSelector.getSelectedSearchModes() optList = myModuleSelector.getSelectedOptimizationModes() if 0: optList.append( "" ) #append the default opt mode iff more optimization modes exist # For-Loop over era, searchMode, and optimizationMode options for era in erasList: for searchMode in modesList: for optimizationMode in optList: Verbose( "era = %s, searchMode = %s, optMode = %s" % (era, searchMode, optimizationMode), True) # If an optimization mode is defined in options skip the rest if opts.optMode != None: if optimizationMode != opts.optMode: continue # Obtain normalization factors myNormFactors = importNormFactors(era, searchMode, optimizationMode, opts.mcrab) # Nominal module myModuleInfoString = getModuleInfoString( era, searchMode, optimizationMode) iModule += 1 # Inform user of what is being processes msg = "Module %d/%d:%s %s/%s" % ( iModule, myTotalModules, ShellStyles.NormalStyle(), myModuleInfoString, shapeType) Print(ShellStyles.CaptionStyle() + msg, True) # Keep time myStartTime = time.time() Verbose("Create dataset manager with given settings", True) nominalModule = ModuleBuilder(opts, myOutputCreator, opts.verbose) nominalModule.createDsetMgr(opts.mcrab, era, searchMode, optimizationMode) if (iModule == 1): if opts.verbose: nominalModule.debug() doQCDNormalizationSyst = False #FIXME if not doQCDNormalizationSyst: msg = "Disabling systematics" Verbose(ShellStyles.WarningLabel() + msg, True) #fixme # Build the module nominalModule.buildModule( opts.dataSrc, opts.ewkSrc, myNormFactors[opts.normFactorKey], doQCDNormalizationSyst, opts.normDataSrc, opts.normEwkSrc) if len(mySystematicsNames) > 0: Print( "Adding QCD normalization systematics (iff also other systematics present) ", True) nominalModule.buildQCDNormalizationSystModule( opts.dataSrc, opts.ewkSrc) # FIXME: add quark gluon weighting systematics! if 0: Print("Adding Quark/Gluon weighting systematics", True) nominalModule.buildQCDQuarkGluonWeightingSystModule( opts.dataSrc, opts.ewkSrc, myNormFactors["FakeWeightingUp"], myNormFactors["FakeWeightingDown"], False, opts.normDataSrc, opts.normEwkSrc) Verbose("Deleting nominal module", True) nominalModule.delete() Verbose("Printing time estimate", True) printTimeEstimate(myGlobalStartTime, myStartTime, iModule, myTotalModules) Verbose("Now do the rest of systematics variations", True) for syst in mySystematicsNames: iModule += 1 msg = "Analyzing systematics variations %d/%d: %s/%s/%s" % ( iModule, myTotalModules, myModuleInfoString, syst, shapeType) Print( ShellStyles.CaptionStyle() + msg + ShellStyles.NormalStyle(), True) myStartTime = time.time() systModule = ModuleBuilder(opts, myOutputCreator) # Create dataset manager with given settings systModule.createDsetMgr(opts.mcrab, era, searchMode, optimizationMode, systematicVariation=syst) # Build asystematics module systModule.buildModule( opts.dataSrc, opts.ewkSrc, myNormFactors[opts.normFactorKey], False, opts.normDataSrc, opts.normEwkSrc) printTimeEstimate(myGlobalStartTime, myStartTime, iModule, myTotalModules) systModule.delete() Verbose("Pseudo-multicrab ready for %s" % shapeType, True) # Print some timing statistics Print( "Average processing time per module was %.1f seconds" % getAvgProcessTimeForOneModule(myGlobalStartTime, myTotalModules), True) Print( "Total elapsed time was %.1f seconds" % getTotalElapsedTime(myGlobalStartTime), False) # Create rest of pseudo multicrab directory myOutputCreator.finalize(silent=False) return
def main(opts): # Apply TDR style style = tdrstyle.TDRStyle() style.setGridX(False) style.setGridY(False) style.setOptStat(False) # Obtain dsetMgrCreator and register it to module selector dsetMgrCreator = dataset.readFromMulticrabCfg(directory=opts.mcrab) # Get list of eras, modes, and optimisation modes erasList = dsetMgrCreator.getDataEras() modesList = dsetMgrCreator.getSearchModes() optList = dsetMgrCreator.getOptimizationModes() sysVarList = dsetMgrCreator.getSystematicVariations() sysVarSrcList = dsetMgrCreator.getSystematicVariationSources() # If user does not define optimisation mode do all of them if opts.optMode == None: if len(optList) < 1: optList.append("") optModes = optList else: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Get the PSets: if 0: datasetsMgr.printSelections() #PrintPSet("BJetSelection", datasetsMgr, depth=150) #PrintPSet("fakeBMeasurement", datasetsMgr, depth=150) sys.exit() # ZJets and DYJets overlap! if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames( ) and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames(): Print( "Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True) datasetsMgr.remove( filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) # Get luminosity if a value is not specified if opts.intLumi < 0: opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity() # Remove datasets removeList = ["QCD-b", "Charged"] if not opts.useMC: removeList.append("QCD") for i, d in enumerate(removeList, 0): msg = "Removing dataset %s" % d Verbose( ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i == 0) datasetsMgr.remove( filter(lambda name: d in name, datasetsMgr.getAllDatasetNames())) # Print summary of datasets to be used if 0: datasetsMgr.PrintInfo() # Merge EWK samples datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets()) # Print dataset information datasetsMgr.PrintInfo() # List of TDirectoryFile (_CRone, _CRtwo, _VR, _SR) tdirs = [ "LdgTrijetPt_", "LdgTrijetMass_", "TetrajetBJetPt_", "TetrajetBJetEta_", "LdgTetrajetPt_", "LdgTetrajetMass_" ] region = ["CRone", "CRtwo"] hList = [] for d in tdirs: for r in region: hList.append(d + r) # Get the folders with the binned histograms folderList_ = datasetsMgr.getDataset( datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent( opts.folder) folderList = [h for h in folderList_ if h in hList] # For-loop: All folders histoPaths = [] for f in folderList: folderPath = os.path.join(opts.folder, f) histoList = datasetsMgr.getDataset( datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent( folderPath) pathList = [os.path.join(folderPath, h) for h in histoList] histoPaths.extend(pathList) # Get all the bin labels binLabels = GetBinLabels("CRone", histoPaths) for i, t in enumerate(tdirs, 1): myList = [] for p in histoPaths: if t in p: myList.append(p) msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format( "Histogram", "%i" % i, "/", "%s:" % (len(tdirs)), t.replace("_", "")) Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), i == 1) PlotHistograms(datasetsMgr, myList, binLabels, opts) # Save the plots Print( "All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True) return
def main(opts): # Apply TDR style style = tdrstyle.TDRStyle() style.setGridX(False) style.setGridY(False) optModes = [""] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Numerator & Denominator dataset manager noSF_datasetsMgr = GetDatasetsFromDir(opts, opts.noSFcrab) withCR2SF_datasetsMgr = GetDatasetsFromDir(opts, opts.withCR2SFcrab) # Update all events to PU weighting noSF_datasetsMgr.updateNAllEventsToPUWeighted() withCR2SF_datasetsMgr.updateNAllEventsToPUWeighted() # Load Luminosities noSF_datasetsMgr.loadLuminosities() withCR2SF_datasetsMgr.loadLuminosities() if 0: noSF_datasetsMgr.PrintCrossSections() noSF_datasetsMgr.PrintLuminosities() # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(noSF_datasetsMgr) plots.mergeRenameReorderForDataMC(withCR2SF_datasetsMgr) # Get luminosity if a value is not specified if opts.intLumi < 0: opts.intLumi = noSF_datasetsMgr.getDataset("Data").getLuminosity() # Remove datasets removeList = [] #removeList = ["TTWJetsToLNu_", "TTWJetsToQQ"] for i, d in enumerate(removeList, 0): msg = "Removing dataset %s" % d Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0) noSF_datasetsMgr.remove(filter(lambda name: d in name, noSF_datasetsMgr.getAllDatasetNames())) # Print summary of datasets to be used if 0: noSF_datasetsMgr.PrintInfo() withCR2SF_datasetsMgr.PrintInfo() # Merge EWK samples EwkDatasets = ["Diboson", "DYJetsToLL", "WJetsHT"] noSF_datasetsMgr.merge("EWK", EwkDatasets) withCR2SF_datasetsMgr.merge("EWK", EwkDatasets) # Get histosgram names folderListIncl = withCR2SF_datasetsMgr.getDataset(withCR2SF_datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(opts.folder) folderList = [h for h in folderListIncl if "AfterAllSelections_LeadingTrijet_Pt" in h ] # For-loop: All histo paths for h in folderList: if "lowMET" in h: folderList.remove(h) folderPath = os.path.join(opts.folder, "") folderPathGen = os.path.join(opts.folder + "Genuine") folderPathFake =os.path.join(opts.folder + "Fake" ) histoList = folderList num_pathList = [os.path.join(folderPath, h) for h in histoList] num_pathList.extend([os.path.join(folderPathGen, h) for h in histoList]) num_pathList.extend([os.path.join(folderPathFake, h) for h in histoList]) # Denominator Histogram (To be used in the estimation of QCD Data-Driven) histoList = [h for h in folderListIncl if "AfterStandardSelections_LeadingTrijet_Pt" in h] den_pathList = [os.path.join(folderPath, h) for h in histoList] den_pathList.extend([os.path.join(folderPathGen, h) for h in histoList]) den_pathList.extend([os.path.join(folderPathFake, h) for h in histoList]) # For-loop: All histo paths for h in den_pathList: if "lowMET" in h: den_pathList.remove(h) # Do the histograms PlotHistos(noSF_datasetsMgr, withCR2SF_datasetsMgr, num_pathList, den_pathList, opts) return
def main(opts): optModes = [""] if opts.optMode != None: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities(fname="lumi.json") # Get Luminosity if opts.intLumi < 0: if "Data" in datasetsMgr.getAllDatasetNames(): opts.intLumi = datasetsMgr.getDataset("Data").getLuminosity() else: opts.intLumi = datasetsMgr.loadLumi() # Set/Overwrite cross-sections datasetsToRemove = [] for d in datasetsMgr.getAllDatasets(): if "M_%s" % (opts.signalMass) in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) else: datasetsToRemove.append(d.getName()) if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) # Custom Filtering of datasets for i, d in enumerate(datasetsToRemove, 0): msg = "Removing dataset %s" % d Verbose( ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i == 0) datasetsMgr.remove( filter(lambda name: d == name, datasetsMgr.getAllDatasetNames())) if opts.verbose: datasetsMgr.PrintInfo() # Merge EWK samples if opts.mergeEWK: datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets()) plots._plotStyles["EWK"] = styles.getAltEWKStyle() # Print dataset information datasetsMgr.PrintInfo() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(opts.gridX) style.setGridY(opts.gridY) # Do Data-MC histograms with DataDriven QCD folder = opts.folder histoList = datasetsMgr.getDataset( datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder) histoPaths = [os.path.join(folder, h) for h in histoList] keepList = ["LdgTetrajetMass_AfterAllSelections"] #keepList = ["LdgTetrajetMass_AfterStandardSelections"] myHistos = [] for h in histoPaths: if h.split("/")[-1] not in keepList: continue else: myHistos.append(h) for i, h in enumerate(myHistos, 1): PlotHistograms(datasetsMgr, h) Print( "All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True) return
def _doCalculate2D(self, nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels): ''' Calculates the result for 2D histograms ''' # Intialize counters for purity calculation in final shape binning myShapeDataSum = [] myShapeDataSumUncert = [] myShapeEwkSum = [] myShapeEwkSumUncert = [] myList = [] for k in range(1,self._resultShape.GetNbinsY()+1): myList.append(0.0) for j in range(1,self._resultShape.GetNbinsX()+1): myShapeDataSum.append(myList[:]) myShapeDataSumUncert.append(myList[:]) myShapeEwkSum.append(myList[:]) myShapeEwkSumUncert.append(myList[:]) # Calculate results separately for each phase-space bin, and then combine them to get inclusive result for i in range(0, nSplitBins): # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin h = shape.getDataDrivenQCDHistoForSplittedBin(i) hData = shape.getDataHistoForSplittedBin(i) hEwk = shape.getEwkHistoForSplittedBin(i) # Get normalization factor wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i) if self._optionUseInclusiveNorm: wQCDLabel = "Inclusive" wQCD = 0.0 if not wQCDLabel in normFactors.keys(): msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel, shape.getHistoName()) print ShellStyles.WarningLabel() + msg else: wQCD = normFactors[wQCDLabel] # Loop over bins in the shape histogram for j in range(1,h.GetNbinsX()+1): for k in range(1,h.GetNbinsY()+1): myResult = 0.0 myStatDataUncert = 0.0 myStatEwkUncert = 0.0 if abs(h.GetBinContent(j,k)) > 0.00001: # Ignore zero bins # Calculate result myResult = h.GetBinContent(j,k) * wQCD # Calculate abs. stat. uncert. for data and for MC EWK myStatDataUncert = hData.GetBinError(j,k) * wQCD myStatEwkUncert = hEwk.GetBinError(j,k) * wQCD #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData")) # Do not calculate here MC EWK syst. myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels) self._resultCountObject.add(myCountObject) if optionDoNQCDByBinHistograms: self._histogramsList[i].SetBinContent(j, k, myCountObject.value()) self._histogramsList[i].SetBinError(j, k, myCountObject.statUncertainty()) self._resultShape.SetBinContent(j, k, self._resultShape.GetBinContent(j, k) + myCountObject.value()) self._resultShape.SetBinError(j, k, self._resultShape.GetBinError(j, k) + myCountObject.statUncertainty()**2) # Sum squared # Sum items for purity calculation myShapeDataSum[j-1][k-1] += hData.GetBinContent(j,k)*wQCD myShapeDataSumUncert[j-1][k-1] += (hData.GetBinError(j,k)*wQCD)**2 myShapeEwkSum[j-1][k-1] += hEwk.GetBinContent(j,k)*wQCD myShapeEwkSumUncert[j-1][k-1] += (hEwk.GetBinError(j,k)*wQCD)**2 h.Delete() hData.Delete() hEwk.Delete() # Take square root of uncertainties for j in range(1,self._resultShape.GetNbinsX()+1): for k in range(1,self._resultShape.GetNbinsY()+1): self._resultShape.SetBinError(j, k, math.sqrt(self._resultShape.GetBinError(j, k))) # Print result print "NQCD Integral(%s) = %s "%(shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f")) # Print purity as function of final shape bins if optionPrintPurityByBins: print "Purity of shape %s"%shape.getHistoName() print "shapeBin purity purityUncert" for j in range (1,self._resultShape.GetNbinsX()+1): for k in range(1,self._resultShape.GetNbinsY()+1): myPurity = 0.0 myPurityUncert = 0.0 if abs(myShapeDataSum[j-1][k-1]) > 0.000001: myPurity = 1.0 - myShapeEwkSum[j-1][k-1] / myShapeDataSum[j-1][k-1] myPurityUncert = errorPropagation.errorPropagationForDivision(myShapeEwkSum[j-1][k-1], math.sqrt(myShapeEwkSumUncert[j-1][k-1]), myShapeDataSum[j-1][k-1], math.sqrt(myShapeDataSumUncert[j-1][k-1])) # Store MC EWK content self._resultShapeEWK.SetBinContent(j, k, myShapeEwkSum[j-1][k-1]) self._resultShapeEWK.SetBinError(j, k, math.sqrt(myShapeEwkSumUncert[j-1][k-1])) self._resultShapePurity.SetBinContent(j, k, myPurity) self._resultShapePurity.SetBinError(j, k, myPurityUncert) # Print purity info of final shape if optionPrintPurityByBins: myString = "" if j < self._resultShape.GetNbinsX(): myString = "%d..%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j),self._resultShape.GetXaxis().GetBinUpEdge(j)) else: myString = ">%d, "%(self._resultShape.GetXaxis().GetBinLowEdge(j)) if k < self._resultShape.GetNbinsY(): myString = "%d..%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k),self._resultShape.GetYaxis().GetBinUpEdge(k)) else: myString = ">%d"%(self._resultShape.GetYaxis().GetBinLowEdge(k)) myString += " %.3f %.3f"%(myPurity, myPurityUncert) print myString return
def main(opts): #optModes = ["", "OptChiSqrCutValue50", "OptChiSqrCutValue100"] optModes = [""] if opts.optMode != None: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json if 0: datasetsMgr.printSelections() sys.exit() # Define datasets to remove by default QCD_list = ["QCD_HT700to1000", "QCD_HT50to100", "QCD_HT500to700", "QCD_HT300to500", "QCD_HT200to300", "QCD_HT2000toInf", "QCD_HT1500to2000", "QCD_HT100to200", "QCD_HT1000to1500"] QCDExt_list = [x+"_ext1" for x in QCD_list] datasetsToRemove = ["QCD-b"] # datasetsToRemove.extend(QCD_list) # datasetsToRemove.extend(QCDExt_list) # ZJets and DYJets overlap if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames(): Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True) datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) #datasetsMgr.remove(filter(lambda name: "DYJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) # Set/Overwrite cross-sections for d in datasetsMgr.getAllDatasets(): if "ChargedHiggs" in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits if d.getName() != opts.signal: datasetsToRemove.append(d.getName()) if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) # Custom Filtering of datasets for i, d in enumerate(datasetsToRemove, 0): msg = "Removing dataset %s" % d Verbose(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0) datasetsMgr.remove(filter(lambda name: d == name, datasetsMgr.getAllDatasetNames())) if opts.verbose: datasetsMgr.PrintInfo() # Re-order datasets (different for inverted than default=baseline) newOrder = ["Data"] for i, d in enumerate(datasetsMgr.getAllDatasets(), 0): if d.isData(): continue else: newOrder.append(d.getName()) # Re-arrange dataset order? if 0: s = newOrder.pop( newOrder.index("noTop") ) newOrder.insert(len(newOrder), s) #after "Data" # Move signal to top if opts.signal in newOrder: s = newOrder.pop( newOrder.index(opts.signal) ) newOrder.insert(1, s) datasetsMgr.selectAndReorder(newOrder) # Merge EWK samples if opts.mergeEWK: datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets()) plots._plotStyles["EWK"] = styles.getAltEWKStyle() # Print dataset information datasetsMgr.PrintInfo() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(opts.gridX) style.setGridY(opts.gridY) # Do Data-MC histograms with DataDriven QCD folder = opts.folder histoList = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder) histoPaths = [os.path.join(folder, h) for h in histoList] ignoreList = ["Aplanarity", "Planarity", "Sphericity", "FoxWolframMoment", "Circularity", "ThirdJetResolution", "Centrality", "_Vs_"] myHistos = [] for h in histoPaths: skip = False # Skip unwanted histos for i in ignoreList: if i in h: skip = True if skip: continue else: myHistos.append(h) for i, h in enumerate(myHistos, 1): # Plot the histograms! msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (len(myHistos)), h) Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), i==1) DataMCHistograms(datasetsMgr, h) Print("All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True) return
def main(opts): #====================== optModes = [""] if opts.optMode != None: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json # Set/Overwrite cross-sections datasetsToRemove = ["QCD-b", "TTTT"]#, "QCD_HT50to100", "QCD_HT100to200"]#, "QCD_HT200to300"]#, "QCD_HT300to500"] for d in datasetsMgr.getAllDatasets(): if "ChargedHiggs" in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits # Re-order datasets datasetOrder = [] for d in datasetsMgr.getAllDatasets(): if "M_" in d.getName(): if d not in signalMass: continue datasetOrder.append(d.getName()) for m in signalMass: datasetOrder.insert(0, m) datasetsMgr.selectAndReorder(datasetOrder) datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Custom Filtering of datasets for i, d in enumerate(datasetsToRemove, 0): msg = "Removing dataset %s" % d Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0) datasetsMgr.remove(filter(lambda name: d in name, datasetsMgr.getAllDatasetNames())) if opts.verbose: datasetsMgr.PrintInfo() # ZJets and DYJets overlap if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames(): Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True) datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) # Merge histograms (see NtupleAnalysis/python/tools/plots.py) plots.mergeRenameReorderForDataMC(datasetsMgr) # Get Luminosity intLumi = datasetsMgr.getDataset("Data").getLuminosity() # Merge EWK samples if opts.mergeEWK: datasetsMgr.merge("EWK", GetListOfEwkDatasets(datasetsMgr)) plots._plotStyles["EWK"] = styles.getAltEWKStyle() # Print dataset information datasetsMgr.PrintInfo() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(opts.gridX) style.setGridY(opts.gridY) # Get histogram list folder = opts.folder histoList = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder) histoPaths1 = [os.path.join(folder, h) for h in histoList] histoPaths2 = [h for h in histoPaths1 if "jet" not in h.lower()] nHistos = len(histoPaths2) # Calculate Signal Significance for all histograms for h in histoList: PlotCutFlowEfficiency(h, datasetsMgr, intLumi) return
def PlotAndFitTemplates(datasetsMgr, histoName, folderName, opts, doFakeB=False): Verbose("PlotAndFitTemplates()") # Definitions inclusiveFolder = folderName genuineBFolder = folderName + "EWKGenuineB" fakeBFolder = folderName + "EWKFakeB" if doFakeB: ewkFolder = genuineBFolder bkgName = "FakeB" else: ewkFolder = inclusiveFolder bkgName = "QCD" # Create the plotters p1 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (inclusiveFolder, histoName)) p2 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (ewkFolder, histoName)) p3 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (inclusiveFolder, histoName)) p4 = plots.DataMCPlot(datasetsMgr, "%s/%s" % (ewkFolder, histoName)) if 0: p1.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2)) p2.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2)) p3.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2)) p4.histoMgr.forEachHisto(lambda h: h.getRootHisto().RebinX(2)) # Get the histograms Data_baseline = p1.histoMgr.getHisto("Data").getRootHisto().Clone( "Baseline Data") #also legend entry name FakeB_baseline = p1.histoMgr.getHisto("Data").getRootHisto().Clone( "Baseline " + bkgName) EWK_baseline = p2.histoMgr.getHisto("EWK").getRootHisto().Clone( "Baseline EWK") Data_inverted = p3.histoMgr.getHisto("Data").getRootHisto().Clone( "Inverted Data") FakeB_inverted = p3.histoMgr.getHisto("Data").getRootHisto().Clone( "Inverted " + bkgName) EWK_inverted = p4.histoMgr.getHisto("EWK").getRootHisto().Clone( "Inverted EWK") # Create FakeB histos: FakeB = (Data - EWK) msg = "Disabled EWK subtraction (Use Case: Control Triggers)" Print(ShellStyles.WarningLabel() + msg, True) #FakeB_baseline.Add(EWK_baseline, -1) #FakeB_inverted.Add(EWK_inverted, -1) # Create the final plot object compareHistos = [EWK_baseline] p = plots.ComparisonManyPlot(FakeB_inverted, compareHistos, saveFormats=[]) p.setLuminosity(GetLumi(datasetsMgr)) # Apply styles p.histoMgr.forHisto("Inverted " + bkgName, styles.getFakeBStyle()) p.histoMgr.forHisto("Baseline EWK", styles.getAltEWKStyle()) # Set draw style p.histoMgr.setHistoDrawStyle("Inverted " + bkgName, "P") p.histoMgr.setHistoDrawStyle("Baseline EWK", "AP") # Set legend style p.histoMgr.setHistoLegendStyle("Inverted " + bkgName, "P") p.histoMgr.setHistoLegendStyle("Baseline EWK", "LP") # p.histoMgr.setHistoLegendStyleAll("LP") # Set legend labels if doFakeB: p.histoMgr.setHistoLegendLabelMany({ "Baseline EWKGenuineB": "EWK (GenuineB)", "Inverted FakeB": "Fake-b", }) else: p.histoMgr.setHistoLegendLabelMany({ "Baseline EWK": "EWK", "Inverted " + bkgName: "QCD", }) #========================================================================================= # Set Minimizer Options #========================================================================================= ''' https://root.cern.ch/root/htmldoc/guides/users-guide/FittingHistograms.html#the-th1fit-method https://root.cern.ch/root/html/src/ROOT__Math__MinimizerOptions.h.html#a14deB ''' if 0: ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Migrad") ROOT.Math.MinimizerOptions.SetDefaultStrategy( 2) # Speed = 0, Balance = 1, Robustness = 2 ROOT.Math.MinimizerOptions.SetDefaultMaxFunctionCalls( 5000) # set maximum of function calls ROOT.Math.MinimizerOptions.SetDefaultMaxIterations( 5000 ) # set maximum iterations (one iteration can have many function calls) if 0: ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Simplex") ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Minimize") ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "MigradImproved") ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Scan") ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit", "Seek") ROOT.Math.MinimizerOptions.SetDefaultErrorDef( 1 ) # error definition (=1. for getting 1 sigma error for chi2 fits) ROOT.Math.MinimizerOptions.SetDefaultMaxFunctionCalls( 1000000) # set maximum of function calls ROOT.Math.MinimizerOptions.SetDefaultMaxIterations( 1000000 ) # set maximum iterations (one iteration can have many function calls) ROOT.Math.MinimizerOptions.SetDefaultPrecision( -1 ) # precision in the objective function calculation (value <= 0 means left to default) ROOT.Math.MinimizerOptions.SetDefaultPrintLevel( 1 ) # None = -1, Reduced = 0, Normal = 1, ExtraForProblem = 2, Maximum = 3 ROOT.Math.MinimizerOptions.SetDefaultTolerance( 1e-03 ) # Minuit/Minuit2 converge when the EDM is less a given tolerance. (default 1e-03) if 1: hLine = "=" * 45 title = "{:^45}".format("Minimzer Options") print "\t", hLine print "\t", title print "\t", hLine minOpt = ROOT.Math.MinimizerOptions() minOpt.Print() print "\t", hLine, "\n" #========================================================================================= # Start fit process #========================================================================================= binLabels = ["Inclusive"] FITMIN = 80 FITMAX = 1000 #moduleInfoString = opts.dataEra + "_" + opts.searchMode + "_" + opts.optMode moduleInfoString = opts.optMode #========================================================================================= # Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager) #========================================================================================= manager = QCDNormalization.QCDNormalizationManagerDefault( binLabels, opts.mcrab, moduleInfoString) template_EWKFakeB_Baseline = manager.createTemplate("EWKFakeB_Baseline") template_EWKFakeB_Inverted = manager.createTemplate("EWKFakeB_Inverted") template_EWKInclusive_Baseline = manager.createTemplate( "EWKInclusive_Baseline") template_EWKInclusive_Inverted = manager.createTemplate( "EWKInclusive_Inverted") template_FakeB_Baseline = manager.createTemplate("QCD_Baseline") template_FakeB_Inverted = manager.createTemplate("QCD_Inverted") #======================================================================2================== # EWK #========================================================================================= par0 = [+7.1817e-01, 0.0, 1.0] # cb_norm par1 = [+1.7684e+02, 150.0, 200.0] # cb_mean par2 = [+2.7287e+01, 20.0, 40.0] # cb_sigma (fixed for chiSq=2) par3 = [-3.9174e-01, -0.5, 0.0] # cb_alpha (fixed for chiSq=2) par4 = [+2.5104e+01, 0.0, 50.0] # cb_n par5 = [+7.4724e-05, 0.0, 1.0] # expo_norm par6 = [-4.6848e-02, -1.0, 0.0] # expo_a par7 = [+2.1672e+02, 200.0, 250.0] # gaus_mean (fixed for chiSq=2) par8 = [+6.3201e+01, 20.0, 80.0] # gaus_sigma template_EWKInclusive_Baseline.setFitter( QCDNormalization.FitFunction("EWKFunction", boundary=0, norm=1, rejectPoints=0), FITMIN, FITMAX) template_EWKInclusive_Baseline.setDefaultFitParam( defaultInitialValue=None, defaultLowerLimit=[ par0[1], par1[1], par2[0], par3[0], par4[1], par5[1], par6[1], par7[0], par8[1] ], defaultUpperLimit=[ par0[2], par1[2], par2[0], par3[0], par4[2], par5[2], par6[2], par7[0], par8[2] ]) #========================================================================================= # FakeB/QCD #========================================================================================= par0 = [8.9743e-01, 0.0, 1.0] # lognorm_norm par1 = [2.3242e+02, 300.0, 1000.0] # lognorm_mean par2 = [1.4300e+00, 0.5, 10.0] # lognorm_shape par3 = [2.2589e+02, 100.0, 500.0] # gaus_mean par4 = [4.5060e+01, 0.0, 100.0] # gaus_sigma template_FakeB_Inverted.setFitter( QCDNormalization.FitFunction("QCDFunctionAlt", boundary=0, norm=1, rejectPoints=0), FITMIN, FITMAX) template_FakeB_Inverted.setDefaultFitParam( defaultInitialValue=None, defaultLowerLimit=[par0[1], par1[1], par2[1], par3[1], par4[1]], defaultUpperLimit=[par0[2], par1[2], par2[2], par3[2], par4[2]]) #========================================================================================= # Set histograms to the templates #========================================================================================= if doFakeB: template_EWKFakeB_Baseline.setHistogram(EWKGenuineB_baseline, "Inclusive") template_EWKFakeB_Inverted.setHistogram(EWKGenuineB_inverted, "Inclusive") template_EWKInclusive_Baseline.setHistogram(EWKGenuineB_baseline, "Inclusive") template_EWKInclusive_Inverted.setHistogram(EWKGenuineB_inverted, "Inclusive") else: template_EWKFakeB_Baseline.setHistogram(EWK_baseline, "Inclusive") template_EWKFakeB_Inverted.setHistogram(EWK_inverted, "Inclusive") template_EWKInclusive_Baseline.setHistogram(EWK_baseline, "Inclusive") template_EWKInclusive_Inverted.setHistogram(EWK_inverted, "Inclusive") template_FakeB_Baseline.setHistogram(FakeB_baseline, "Inclusive") template_FakeB_Inverted.setHistogram(FakeB_inverted, "Inclusive") #========================================================================================= # Make plots of templates #========================================================================================= manager.plotTemplates() #========================================================================================= # Fit individual templates to histogram "data_baseline", with custom fit options #========================================================================================= fitOptions = "R B L W 0 Q M" manager.calculateNormalizationCoefficients(Data_baseline, fitOptions, FITMIN, FITMAX) # Only for when the measurement is done in bins fileName = os.path.join( opts.mcrab, "QCDInvertedNormalizationFactors%s.py" % (getModuleInfoString(opts))) manager.writeNormFactorFile(fileName, opts) if 1: saveName = fileName.replace("/", "_") # Draw the histograms plots.drawPlot( p, saveName, **GetHistoKwargs(histoName)) #the "**" unpacks the kwargs_ # Save plot in all formats SavePlot(p, saveName, os.path.join(opts.saveDir, "Fit")) return
def main(opts): #optModes = ["", "OptChiSqrCutValue50", "OptChiSqrCutValue100"] optModes = [""] if opts.optMode != None: optModes = [opts.optMode] # For-loop: All opt Mode for opt in optModes: opts.optMode = opt # Setup & configure the dataset manager datasetsMgr = GetDatasetsFromDir(opts) datasetsMgr.updateNAllEventsToPUWeighted() datasetsMgr.loadLuminosities() # from lumi.json datasetsMgr_matched = GetDatasetsFromDir(opts) datasetsMgr_matched.updateNAllEventsToPUWeighted() datasetsMgr_matched.loadLuminosities() # from lumi.json plots.mergeRenameReorderForDataMC(datasetsMgr) datasetsMgr.remove(filter(lambda name: "QCD_b" in name, datasetsMgr.getAllDatasetNames())) #soti datasetsMgr_matched.remove(filter(lambda name: "QCD" in name, datasetsMgr_matched.getAllDatasetNames())) #soti # Set/Overwrite cross-sections datasetsToRemove = ["QCD-b"]#, "QCD_HT50to100", "QCD_HT100to200"]#, "QCD_HT200to300"]#, "QCD_HT300to500"] for d in datasetsMgr.getAllDatasets(): if "ChargedHiggs" in d.getName(): datasetsMgr.getDataset(d.getName()).setCrossSection(1.0) # ATLAS 13 TeV H->tb exclusion limits #if d.getName() != opts.signal: if "M_650" in d.getName(): #soti fixmi datasetsToRemove.append(d.getName()) if "M_800" in d.getName(): datasetsToRemove.append(d.getName()) if "M_200" in d.getName(): datasetsToRemove.append(d.getName()) if opts.verbose: datasetsMgr.PrintCrossSections() datasetsMgr.PrintLuminosities() # Custom Filtering of datasets for i, d in enumerate(datasetsToRemove, 0): msg = "Removing dataset %s" % d Print(ShellStyles.WarningLabel() + msg + ShellStyles.NormalStyle(), i==0) datasetsMgr.remove(filter(lambda name: d in name, datasetsMgr.getAllDatasetNames())) datasetsMgr_matched.remove(filter(lambda name: d in name, datasetsMgr_matched.getAllDatasetNames())) #soti if opts.verbose: datasetsMgr.PrintInfo() # ZJets and DYJets overlap if "ZJetsToQQ_HT600toInf" in datasetsMgr.getAllDatasetNames() and "DYJetsToQQ_HT180" in datasetsMgr.getAllDatasetNames(): Print("Cannot use both ZJetsToQQ and DYJetsToQQ due to duplicate events? Investigate. Removing ZJetsToQQ datasets for now ..", True) datasetsMgr.remove(filter(lambda name: "ZJetsToQQ" in name, datasetsMgr.getAllDatasetNames())) #datasetsMgr.merge("QCD", GetListOfQCDatasets()) #plots._plotStyles["QCD"] = styles.getQCDLineStyle() # Merge histograms (see NtupleAnalysis/python/tools/plots.py) # Get Luminosity #intLumi = datasetsMgr.getDataset("Data").getLuminosity() Soti intLumi = 35920 # Re-order datasets (different for inverted than default=baseline) newOrder = [] # For-loop: All MC datasets for d in datasetsMgr.getMCDatasets(): newOrder.append(d.getName()) # Move signal to top # if opts.signal in newOrder: # s = newOrder.pop( newOrder.index(opts.signal) ) # newOrder.insert(0, s) print len(newOrder), "newOrder" signalMass = ["M_300", "M_500", "M_1000"] for d in datasetsMgr.getMCDatasets(): for m in signalMass: if m in d.getName(): s = newOrder.pop( newOrder.index(d.getName()) ) newOrder.insert(0, s) #datasetsMgr.selectAndReorder(newOrder) print len(newOrder), "newOrder" # Add Data to list of samples! if not opts.onlyMC: newOrder.insert(0, "Data") # Apply new dataset order! datasetsMgr.selectAndReorder(newOrder) # Merge EWK samples if opts.mergeEWK: datasetsMgr.merge("EWK", aux.GetListOfEwkDatasets()) plots._plotStyles["EWK"] = styles.getAltEWKStyle() # Print dataset information datasetsMgr.PrintInfo() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) style.setGridX(opts.gridX) style.setGridY(opts.gridY) # Do Data-MC histograms with DataDriven QCD folder = opts.folder histoList = datasetsMgr.getDataset(datasetsMgr.getAllDatasetNames()[0]).getDirectoryContent(folder) histoPaths1 = [os.path.join(folder, h) for h in histoList] histoPaths2 = [h for h in histoPaths1]# if "jet" not in h.lower()] nHistos = len(histoPaths2) # For-loop: All histograms for i, h in enumerate(histoPaths2, 1): msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (nHistos), h) Print(ShellStyles.SuccessStyle() + msg + ShellStyles.NormalStyle(), i==1) PlotHistograms(datasetsMgr, datasetsMgr_matched, h, intLumi) ROOT.gStyle.SetNdivisions(10, "X") Print("All plots saved under directory %s" % (ShellStyles.NoteStyle() + aux.convertToURL(opts.saveDir, opts.url) + ShellStyles.NormalStyle()), True) return
def _doCalculate(self, shape, moduleInfoString, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms): ''' Calculates the result ''' Verbose("Calculate final shape in signal region (shape * w_QCD) & initialize result containers", True) nSplitBins = shape.getNumberOfPhaseSpaceSplitBins() Verbose("Create Shape", True) self._resultShape = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShape.Reset() self._resultShape.SetTitle("NQCDFinal_Total_%s"%moduleInfoString) self._resultShape.SetName("NQCDFinal_Total_%s"%moduleInfoString) Verbose("Create EWK shape", True) self._resultShapeEWK = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShapeEWK.Reset() self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s"%moduleInfoString) self._resultShapeEWK.SetName("NQCDFinal_EWK_%s"%moduleInfoString) Verbose("Create Purity shape", True) self._resultShapePurity = aux.Clone(shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShapePurity.Reset() self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s"%moduleInfoString) self._resultShapePurity.SetName("NQCDFinal_Purity_%s"%moduleInfoString) self._histogramsList = [] myUncertaintyLabels = ["statData", "statEWK"] self._resultCountObject = extendedCount.ExtendedCount(0.0, [0.0, 0.0], myUncertaintyLabels) if optionDoNQCDByBinHistograms: for i in range(0, nSplitBins): hBin = aux.Clone(self._resultShape) hBin.SetTitle("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString)) hBin.SetName("NQCDFinal_%s_%s"%(shape.getPhaseSpaceBinFileFriendlyTitle(i).replace(" ",""), moduleInfoString)) self._histogramsList.append(hBin) if isinstance(self._resultShape, ROOT.TH2): self._doCalculate2D(nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels) return # Intialize counters for purity calculation in final shape binning myShapeDataSum = [] myShapeDataSumUncert = [] myShapeEwkSum = [] myShapeEwkSumUncert = [] # For-loop: All Bins for j in range(1,self._resultShape.GetNbinsX()+1): myShapeDataSum.append(0.0) myShapeDataSumUncert.append(0.0) myShapeEwkSum.append(0.0) myShapeEwkSumUncert.append(0.0) Verbose("Calculate results separately for each phase-space bin and then combine", True) # For-loop: All measurement bins (e.g. tau pT bins for HToTauNu) for i in range(0, nSplitBins): # N.B: The \"Inclusive\" value is in the zeroth bin Verbose("Get data-driven QCD, data, and MC EWK shape histogram for the phase-space bin", True) h = shape.getDataDrivenQCDHistoForSplittedBin(i) hData = shape.getDataHistoForSplittedBin(i) hEwk = shape.getEwkHistoForSplittedBin(i) Verbose("Get normalization factor", True) wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i) if self._optionUseInclusiveNorm: wQCDLabel = "Inclusive" wQCD = 0.0 if not wQCDLabel in normFactors.keys(): msg = "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % (wQCDLabel,shape.getHistoName()) Print(ShellStyles.WarningLabel() + msg, True) else: wQCD = normFactors[wQCDLabel] msg = "Weighting bin \"%i\" (label=\"%s\") with normFactor \"%s\"" % (i, wQCDLabel, wQCD) Verbose(ShellStyles.NoteLabel() + msg, True) # Construct info table (debugging) table = [] align = "{:>6} {:^10} {:^15} {:>10} {:>10} {:>10} {:^3} {:^8} {:^3} {:^8}" header = align.format("Bin", "Width", "Range", "Content", "NormFactor", "QCD", "+/-", "Data", "+/-", "EWK") hLine = "="*90 table.append("{:^90}".format(shape.getHistoName())) table.append(hLine) table.append(header) table.append(hLine) binSum = 0.0 nBins = h.GetNbinsX() binWidth = hData.GetBinWidth(0) xMin = hData.GetXaxis().GetBinCenter(0) xMax = hData.GetXaxis().GetBinCenter(nBins+1) # For-Loop (nested): All bins in the shape histogram for j in range(1, nBins+1): # Initialise values myResult = 0.0 myStatDataUncert = 0.0 myStatEwkUncert = 0.0 # Ignore zero bins if abs(h.GetBinContent(j)) > 0.00001: Verbose("Calculating the result") binContent = h.GetBinContent(j) binRange = "%.1f -> %.1f" % (h.GetXaxis().GetBinLowEdge(j), h.GetXaxis().GetBinUpEdge(j) ) binWidth = GetTH1BinWidthString(h, j) binSum += binContent myResult = binContent * wQCD #apply normalisation factor (transfer from CR to SR)) Verbose("Calculate abs. stat. uncert. for data and for MC EWK (Do not calculate here MC EWK syst.)", True) myStatDataUncert = hData.GetBinError(j) * wQCD myStatEwkUncert = hEwk.GetBinError(j) * wQCD table.append(align.format(j, binWidth, binRange, "%0.1f" % binContent, wQCD, "%.1f" % myResult, "+/-", "%.1f" % myStatDataUncert, "+/-", "%.1f" % myStatEwkUncert)) # Get count object myCountObject = extendedCount.ExtendedCount(myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels) self._resultCountObject.add(myCountObject) if optionDoNQCDByBinHistograms: Verbose("Setting bin content \"%i\"" % (j), True) self._histogramsList[i].SetBinContent(j, myCountObject.value()) self._histogramsList[i].SetBinError(j, myCountObject.statUncertainty()) binContent = self._resultShape.GetBinContent(j) + myCountObject.value() binError = self._resultShape.GetBinError(j) + myCountObject.statUncertainty()**2 Verbose("Setting bin %i to content %0.1f +/- %0.1f" % (j, binContent, binError), j==0) self._resultShape.SetBinContent(j, binContent) self._resultShape.SetBinError(j, binError) # Sum squared (take sqrt outside loop on final squared sum) Verbose("Sum items for purity calculation", True) myShapeDataSum[j-1] += hData.GetBinContent(j)*wQCD myShapeDataSumUncert[j-1] += (hData.GetBinError(j)*wQCD)**2 myShapeEwkSum[j-1] += hEwk.GetBinContent(j)*wQCD myShapeEwkSumUncert[j-1] += (hEwk.GetBinError(j)*wQCD)**2 # Delete the shape histograms h.Delete() hData.Delete() hEwk.Delete() # For-loop: All shape bins for j in range(1,self._resultShape.GetNbinsX()+1): # Take square root of uncertainties self._resultShape.SetBinError(j, math.sqrt(self._resultShape.GetBinError(j))) # Print detailed results in a formatted table qcdResults = self._resultCountObject.getResultAndStatErrorsDict() bins = "%0.f-%.0f" % (1, nBins) binRange = "%.1f -> %.1f" % (xMin, xMax) binSum = "%.1f" % binSum nQCD = "%.1f" % qcdResults["value"] dataStat = "%.1f" % qcdResults["statData"] ewkStat = "%.1f" % qcdResults["statEWK"] table.append(align.format(bins, binWidth, binRange, binSum, wQCD, nQCD, "+/-", dataStat, "+/-", ewkStat)) table.append(hLine) for i, line in enumerate(table): if i == len(table)-2: Verbose(ShellStyles.TestPassedStyle()+line+ShellStyles.NormalStyle(), i==0) else: Verbose(line, i==0) if optionPrintPurityByBins: Verbose("Printing Shape Purity bin-by-bin.", True) self.PrintPurityByBins(nBins, shape, myShapeDataSum, myShapeDataSumUncert, myShapeEwkSum, myShapeEwkSumUncert) return
def _initialize(self, h): if len(self._binLabels) > 0: return myTitle = h.GetTitle() myList = myTitle.split(self._separator) myFactorisationBins = int( len(myList) / 2 ) # allows for the title of the histogram to be placed after the last separator myOutput = "" for i in range(0, myFactorisationBins): self._binLabels.append(myList[i * 2]) if myList[i * 2 + 1].isdigit(): self._binCount.append(int(myList[i * 2 + 1])) else: # try a bug fix by taking first character only if myList[i * 2 + 1][0].isdigit(): print ShellStyles.WarningLabel( ) + "UnfoldedHistogramReader::_initialize(): tried naive bug fix for last factorisation bin dimension (guessed dimension: %s, histo: %s)" % ( myList[i * 2 + 1][0], myList[i * 2 + 1][1:]) self._binCount.append(int(myList[i * 2 + 1][0])) else: raise Exception( ShellStyles.ErrorLabel() + "UnfoldedHistogramReader: failed to decompose histogram title (it should contain the bin label and nbins information for n bins separated with '%s'\nHistogram title was: %s" % (self._separator, myTitle)) myOutput += "%s nbins=%d " % (self._binLabels[i], self._binCount[i]) if self._debugStatus: print "UnfoldedHistogramReader: Histogram binning determined as : %s" % myOutput if len(self._binLabels) == 0: raise Exception( ShellStyles.ErrorLabel() + "UnfoldedHistogramReader: failed to decompose histogram title (it should contain the bin label and nbins information for n bins separated with '%s'\nHistogram title was: %s" % (self._separator, myTitle)) self._unfoldedBinCount = h.GetNbinsY() # Loop over y axis to find axis values myBinCaptions = [] myBinRanges = [] for i in range(1, h.GetNbinsY() + 1): mySplitBin = h.GetYaxis().GetBinLabel(i).split("/") # Obtain bin captions if len(self._factorisationCaptions) == 0: for s in mySplitBin: myCaption = "" if "=" in s: myCaption = s.split("=")[0] elif ">" in s: myCaption = s.split(">")[0] elif "<" in s: myCaption = s.split("<")[0] self._factorisationFullBinLabels.append([]) self._factorisationCaptions.append(myCaption) self._factorisationRanges.append([]) # Obtain range information for k in range(0, len(mySplitBin)): if not mySplitBin[k] in self._factorisationFullBinLabels[k]: self._factorisationFullBinLabels[k].append(mySplitBin[k]) # Remove label and equal signs s = mySplitBin[k].replace(self._factorisationCaptions[k], "").replace("=", "") if not s in self._factorisationRanges[k]: self._factorisationRanges[k].append(s)
def __init__(self, dataPath, ewkPath, dsetMgr, luminosity, moduleInfoString, normFactors, optionCalculateQCDNormalizationSyst=True, normDataSrc = None, normEWKSrc = None, optionUseInclusiveNorm=False, verbose=False): self._shapePlots = [] self._shapePlotLabels = [] self._QCDNormalizationSystPlots = [] self._QCDNormalizationSystPlotLabels = [] self._moduleInfoString = moduleInfoString self._useInclusiveNorm = optionUseInclusiveNorm if len(normFactors.keys()) == 1 and normFactors.keys()[0] == "Inclusive": self._useInclusiveNorm = True self._verbose = verbose msg = "Obtaining final shape from data path \"%s\"" % (dataPath) Verbose(ShellStyles.HighlightStyle() + msg + ShellStyles.NormalStyle(), True) # Determine list of plots to consider myObjects = dsetMgr.getDataset("Data").getDirectoryContent(dataPath) # Ignore unwanted histograms and those designed for HToTauNu keywordList = ["JetEtaPhi"] ignoreList = [] for k in keywordList: ignoreList.extend(filter(lambda name: k in name, myObjects)) msg = "Ignoring a total of %s histograms:" % (len(ignoreList)) Print(ShellStyles.WarningLabel() + msg, True) for hName in ignoreList: print "\t", os.path.join(dataPath, hName) # Update myObjects list with filtered results myObjects = list(x for x in myObjects if x not in ignoreList) # For-Loop: All plots to consider for i, plotName in enumerate(myObjects, 1): # For testing #if "LdgTrijetMass_AfterAllSelections" not in plotName: # continue msg = "{:<9} {:>3} {:<1} {:<3} {:<50}".format("Histogram", "%i" % i, "/", "%s:" % (len(myObjects)), os.path.join(dataPath, plotName) ) Print(ShellStyles.HighlightAltStyle() + msg + ShellStyles.NormalStyle(), i==1) # Ensure that histograms exist dataOk = self._sanityChecks(dsetMgr, dataPath, plotName) ewkOk = self._sanityChecks(dsetMgr, ewkPath, plotName) Verbose("Obtaining shape plots (the returned object is not owned)", True) myShapeHisto = self._obtainShapeHistograms(i, dataPath, ewkPath, dsetMgr, plotName, luminosity, normFactors) # Obtain plots for systematics coming from met shape difference for control plots #FIXME-Systematics if optionCalculateQCDNormalizationSyst: if isinstance(myShapeHisto, ROOT.TH2): msg = "Skipping met shape uncertainty because histogram has more than 1 dimensions!" Print(ShellStyles.WarningLabel() + msg, True) else: self._obtainQCDNormalizationSystHistograms(myShapeHisto, dsetMgr, plotName, luminosity, normDataSrc, normEWKSrc) return
def _doCalculate(self, shape, moduleInfoString, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms): # Calculate final shape in signal region (shape * w_QCD) nSplitBins = shape.getNumberOfPhaseSpaceSplitBins() # Initialize result containers self._resultShape = aux.Clone( shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShape.Reset() self._resultShape.SetTitle("NQCDFinal_Total_%s" % moduleInfoString) self._resultShape.SetName("NQCDFinal_Total_%s" % moduleInfoString) self._resultShapeEWK = aux.Clone( shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShapeEWK.Reset() self._resultShapeEWK.SetTitle("NQCDFinal_EWK_%s" % moduleInfoString) self._resultShapeEWK.SetName("NQCDFinal_EWK_%s" % moduleInfoString) self._resultShapePurity = aux.Clone( shape.getDataDrivenQCDHistoForSplittedBin(0)) self._resultShapePurity.Reset() self._resultShapePurity.SetTitle("NQCDFinal_Purity_%s" % moduleInfoString) self._resultShapePurity.SetName("NQCDFinal_Purity_%s" % moduleInfoString) self._histogramsList = [] myUncertaintyLabels = ["statData", "statEWK"] self._resultCountObject = extendedCount.ExtendedCount( 0.0, [0.0, 0.0], myUncertaintyLabels) if optionDoNQCDByBinHistograms: for i in range(0, nSplitBins): hBin = aux.Clone(self._resultShape) hBin.SetTitle( "NQCDFinal_%s_%s" % (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace( " ", ""), moduleInfoString)) hBin.SetName( "NQCDFinal_%s_%s" % (shape.getPhaseSpaceBinFileFriendlyTitle(i).replace( " ", ""), moduleInfoString)) self._histogramsList.append(hBin) if isinstance(self._resultShape, ROOT.TH2): self._doCalculate2D(nSplitBins, shape, normFactors, optionPrintPurityByBins, optionDoNQCDByBinHistograms, myUncertaintyLabels) return # Intialize counters for purity calculation in final shape binning myShapeDataSum = [] myShapeDataSumUncert = [] myShapeEwkSum = [] myShapeEwkSumUncert = [] for j in range(1, self._resultShape.GetNbinsX() + 1): myShapeDataSum.append(0.0) myShapeDataSumUncert.append(0.0) myShapeEwkSum.append(0.0) myShapeEwkSumUncert.append(0.0) # Calculate results separately for each phase space bin and then combine for i in range(0, nSplitBins): # Get data-driven QCD, data, and MC EWK shape histogram for the phase space bin h = shape.getDataDrivenQCDHistoForSplittedBin(i) hData = shape.getDataHistoForSplittedBin(i) hEwk = shape.getEwkHistoForSplittedBin(i) # Get normalization factor wQCDLabel = shape.getPhaseSpaceBinFileFriendlyTitle(i) if self._optionUseInclusiveNorm: wQCDLabel = "Inclusive" wQCD = 0.0 if not wQCDLabel in normFactors.keys(): print ShellStyles.WarningLabel( ) + "No normalization factors available for bin '%s' when accessing histogram %s! Ignoring this bin..." % ( wQCDLabel, shape.getHistoName()) else: wQCD = normFactors[wQCDLabel] # Loop over bins in the shape histogram for j in range(1, h.GetNbinsX() + 1): myResult = 0.0 myStatDataUncert = 0.0 myStatEwkUncert = 0.0 if abs(h.GetBinContent(j)) > 0.00001: # Ignore zero bins # Calculate result myResult = h.GetBinContent(j) * wQCD # Calculate abs. stat. uncert. for data and for MC EWK myStatDataUncert = hData.GetBinError(j) * wQCD myStatEwkUncert = hEwk.GetBinError(j) * wQCD #errorPropagation.errorPropagationForProduct(hLeg1.GetBinContent(j), hLeg1Data.GetBinError(j), myEffObject.value(), myEffObject.uncertainty("statData")) # Do not calculate here MC EWK syst. myCountObject = extendedCount.ExtendedCount( myResult, [myStatDataUncert, myStatEwkUncert], myUncertaintyLabels) self._resultCountObject.add(myCountObject) if optionDoNQCDByBinHistograms: self._histogramsList[i].SetBinContent( j, myCountObject.value()) self._histogramsList[i].SetBinError( j, myCountObject.statUncertainty()) self._resultShape.SetBinContent( j, self._resultShape.GetBinContent(j) + myCountObject.value()) self._resultShape.SetBinError( j, self._resultShape.GetBinError(j) + myCountObject.statUncertainty()**2) # Sum squared # Sum items for purity calculation myShapeDataSum[j - 1] += hData.GetBinContent(j) * wQCD myShapeDataSumUncert[j - 1] += (hData.GetBinError(j) * wQCD)**2 myShapeEwkSum[j - 1] += hEwk.GetBinContent(j) * wQCD myShapeEwkSumUncert[j - 1] += (hEwk.GetBinError(j) * wQCD)**2 h.Delete() hData.Delete() hEwk.Delete() # Take square root of uncertainties for j in range(1, self._resultShape.GetNbinsX() + 1): self._resultShape.SetBinError( j, math.sqrt(self._resultShape.GetBinError(j))) # Print result print "NQCD Integral(%s) = %s " % ( shape.getHistoName(), self._resultCountObject.getResultStringFull("%.1f")) # Print purity as function of final shape bins if optionPrintPurityByBins: print "Purity of shape %s" % shape.getHistoName() print "shapeBin purity purityUncert" for j in range(1, self._resultShape.GetNbinsX() + 1): myPurity = 0.0 myPurityUncert = 0.0 if abs(myShapeDataSum[j - 1]) > 0.000001: myPurity = 1.0 - myShapeEwkSum[j - 1] / myShapeDataSum[j - 1] myPurityUncert = errorPropagation.errorPropagationForDivision( myShapeEwkSum[j - 1], math.sqrt(myShapeEwkSumUncert[j - 1]), myShapeDataSum[j - 1], math.sqrt(myShapeDataSumUncert[j - 1])) # Store MC EWK content self._resultShapeEWK.SetBinContent(j, myShapeEwkSum[j - 1]) self._resultShapeEWK.SetBinError( j, math.sqrt(myShapeEwkSumUncert[j - 1])) self._resultShapePurity.SetBinContent(j, myPurity) self._resultShapePurity.SetBinError(j, myPurityUncert) # Print purity info of final shape if optionPrintPurityByBins: myString = "" if j < self._resultShape.GetNbinsX(): myString = "%d..%d" % ( self._resultShape.GetXaxis().GetBinLowEdge(j), self._resultShape.GetXaxis().GetBinUpEdge(j)) else: myString = ">%d" % ( self._resultShape.GetXaxis().GetBinLowEdge(j)) myString += " %.3f %.3f" % (myPurity, myPurityUncert) print myString
def importNormFactors(era, searchMode, optimizationMode, multicrabDirName): ''' Imports the auto-generates FakeBTranserFactors.py file, which is created by the plotting/fitting templates script (plotQCD_Fit.py) This containsthe results of fitting to the Baseline Data the templates m_{jjb} shapes from the QCD (Inverted Data) and EWK (Baseline MC). Results include the fit details for each shape and the QCD NormFactor for moving from the ControlRegion (CR) to the Signal Region (SR). The aforementioned python file and a folder with the histogram ROOT files and the individual fits. The foler name will be normalisationPlots/<OptsMode> and will be placed inside the <pseudomulticrab_dir>. The autogenerated file file be place in the cwd (i.e. work/) ''' # Find candidates for normalisation scripts scriptList = getNormFactorFileList(dirName=multicrabDirName, fileBaseName=opts.normFactorsSrc) # Create a string with the module information used moduleInfoString = getModuleInfoString(era, searchMode, optimizationMode) # Construct source file name src = getGetNormFactorsSrcFilename(multicrabDirName, opts.normFactorsSrc % moduleInfoString) # Check if normalization coefficients are suitable for the choses era Verbose("Reading normalisation factors from:\n\t%s" % src, True) # Split the path to get just the file name of src pathList = src.replace(".py", "").split("/") # Insert the directory where the normFactor files reside into the path so that they are found if len(pathList) > 1: cwd = os.getenv("PWD") # Get directories to src in a list [i.e. remove the last entry (file-name) from the pathList] dirList = map(str, pathList[:(len(pathList) - 1)]) srcDir = "/".join(dirList) sys.path.insert(0, os.path.join(cwd, srcDir)) # Import the (normFactor) src file Print( "Importing the transfer factors from src file %s" % (ShellStyles.NoteStyle() + src + ShellStyles.NormalStyle()), True) srcBase = os.path.basename("/".join(pathList)) normFactorsImport = __import__(srcBase) # Get the function definition myNormFactorsSafetyCheck = getattr(normFactorsImport, "QCDInvertedNormalizationSafetyCheck") Verbose( "Check that the era=%s, searchMode=%s, optimizationMode=%s info matches!" % (era, searchMode, optimizationMode)) myNormFactorsSafetyCheck(era, searchMode, optimizationMode) # Obtain normalization factors myNormFactorsImport = getattr(normFactorsImport, "QCDNormalization") # Systematic Variations msg = "Disabled NormFactors SystVar Fake Weighting Up/Down" Print(ShellStyles.WarningLabel() + msg, True) # myNormFactorsImportSystVarFakeWeightingDown = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarDown") #FIXME # myNormFactorsImportSystVarFakeWeightingUp = getattr(normFactorsImport, "QCDPlusEWKFakeTausNormalizationSystFakeWeightingVarUp") #FIXME # Import the normalisation factors and inform user myNormFactors = {} if "FakeB" in opts.analysisName: myNormFactors[opts.normFactorKey] = myNormFactorsImport elif "GenuineB" in opts.analysisName: myNormFactors[opts.normFactorKey] = {'Inclusive': 1.0} else: raise Exception("This should not be reached!") # Inform user of normalisation factors msg = "Obtained %s normalisation factor dictionary. The values are:" % ( ShellStyles.NoteStyle() + opts.normFactorKey + ShellStyles.NormalStyle()) Print(msg, True) for i, k in enumerate(myNormFactors[opts.normFactorKey], 1): keyName = k keyValue = myNormFactors[opts.normFactorKey][k] #msg += "%s = %s" % (keyName, keyValue) msg = "%s = %s" % (keyName, keyValue) Print(msg, i == 0) # Inform user of weighting up/down msg = "Disabled NormFactors Weighting Up/Down" Verbose(ShellStyles.WarningLabel() + msg, True) #fixme # myNormFactors["FakeWeightingDown"] = myNormFactorsImportSystVarFakeWeightingDown # FIXME # myNormFactors["FakeWeightingUp"] = myNormFactorsImportSystVarFakeWeightingUp # FIXME return myNormFactors