def main(argv): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" comparisonList = ["AfterStdSelections"] dirs = [] if len(sys.argv) < 2: usage() dirs.append(sys.argv[1]) # Create all dsetMgr from a multicrab task dsetMgr = dataset.getDatasetsFromMulticrabDirs(dirs, dataEra=dataEra, searchMode=searchMode, analysisName=analysis) #print dsetMgr # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0], dsetMgr, name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove( filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) dsetMgr.merge( "EWK", [ "TTJets", "WJetsHT", "DYJetsToLL", "SingleTop", #"Diboson" ]) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent( COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception( "Error: Cannot find histogram or directory of name '%s'!" % name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: bins.append( hname.replace("NormalizationMETBaselineTau" + HISTONAME, "")) title = dsetMgr.getDataset("Data").getDatasetRootHisto( COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + "/" + hname).getHistogram().GetTitle() title = title.replace("METBaseline" + HISTONAME, "") binLabels.append(formatHistoTitle(title)) print "\nHistogram bins available", bins print "Using bins ", bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": " + binLabels[i] print line print #===== Initialize normalization calculator invertedQCD = InvertedTauID() invertedQCD.setLumi(dsetMgr.getDataset("Data").getLuminosity()) invertedQCD.setInfo([dataEra, searchMode, HISTONAME]) #===== Loop over tau pT bins for i, binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s" % binLabels[i] print "********************************\n" invertedQCD.resetBinResults() invertedQCD.setLabel(binLabels[i]) #===== Obtain histograms for normalization metBase = plots.DataMCPlot( dsetMgr, COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binStr) metInver = plots.DataMCPlot( dsetMgr, COMBINEDHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) metBase_GenuineTaus = plots.DataMCPlot( dsetMgr, GENUINEHISTODIR + "/" + BASELINETAUHISTONAME + binStr) metInver_GenuineTaus = plots.DataMCPlot( dsetMgr, GENUINEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) metBase_FakeTaus = plots.DataMCPlot( dsetMgr, FAKEHISTODIR + "/" + BASELINETAUHISTONAME + binStr) metInver_FakeTaus = plots.DataMCPlot( dsetMgr, FAKEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) #===== Rebin histograms before subtracting RebinFactor = 2 # Aim for 10 GeV binning metBase.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metBase_GenuineTaus.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver_GenuineTaus.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metBase_FakeTaus.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver_FakeTaus.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) #===== Obtain templates for data and EWK metInverted_data = metInver.histoMgr.getHisto( "Data").getRootHisto().Clone(COMBINEDHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) treatHistogram(metInverted_data, "Data, inverted") metInverted_EWK_GenuineTaus = metInver_GenuineTaus.histoMgr.getHisto( "EWK").getRootHisto().Clone(GENUINEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) treatHistogram(metInverted_EWK_GenuineTaus, "EWK genuine taus, inverted") metInverted_EWK_FakeTaus = metInver_FakeTaus.histoMgr.getHisto( "EWK").getRootHisto().Clone(FAKEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr) treatHistogram(metInverted_EWK_FakeTaus, "EWK fake taus, inverted") metBase_data = metBase.histoMgr.getHisto( "Data").getRootHisto().Clone(COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binStr) treatHistogram(metBase_data, "Data, baseline") metBase_EWK_GenuineTaus = metBase_GenuineTaus.histoMgr.getHisto( "EWK").getRootHisto().Clone(GENUINEHISTODIR + "/" + BASELINETAUHISTONAME + binStr) treatHistogram(metBase_EWK_GenuineTaus, "EWK genuine taus, baseline") metBase_EWK_FakeTaus = metBase_FakeTaus.histoMgr.getHisto( "EWK").getRootHisto().Clone(FAKEHISTODIR + "/" + BASELINETAUHISTONAME + binStr) treatHistogram(metBase_EWK_FakeTaus, "EWK fake taus, baseline") #===== Obtain templates for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check #metBase_QCD = metBase_data.Clone("QCD") #metBase_QCD.Add(metBase_EWK_GenuineTaus,-1) #metBase_QCD.Add(metBase_EWK_FakeTaus,-1) #addLabels(metBase_QCD, "QCD, baseline") metInverted_QCD = metInverted_data.Clone("QCD") metInverted_QCD.Add(metInverted_EWK_GenuineTaus, -1) metInverted_QCD.Add(metInverted_EWK_FakeTaus, -1) treatHistogram(metInverted_QCD, "QCD, inverted") #===== Make plots of templates print "\n*** Integrals of plotted templates" #invertedQCD.plotHisto(metInverted_data,"template_Data_Inverted") #invertedQCD.plotHisto(metInverted_EWK_GenuineTaus,"template_EWKGenuineTaus_Inverted") #invertedQCD.plotHisto(metInverted_EWK_FakeTaus,"template_EWKFakeTaus_Inverted") invertedQCD.plotHisto(metInverted_QCD, "template_QCD_Inverted") invertedQCD.plotHisto(metBase_data, "template_Data_Baseline") invertedQCD.plotHisto(metBase_EWK_GenuineTaus, "template_EWKGenuineTaus_Baseline") invertedQCD.plotHisto(metBase_EWK_FakeTaus, "template_EWKFakeTaus_Baseline") #invertedQCD.plotHisto(metBase_QCD,"template_QCD_Baseline") #===== Fit individual templates and # Fit first templates for QCD, EWK_genuine_taus, and EWK_fake_taus # Then fit the shape of those parametrizations to baseline data to obtain normalization coefficients fitOptions = "RB" # Strategy: take EWK templates from baseline and QCD template from inverted; then fit to baseline data invertedQCD.fitEWK_GenuineTaus(metInverted_EWK_GenuineTaus, fitOptions) invertedQCD.fitEWK_GenuineTaus(metBase_EWK_GenuineTaus, fitOptions) invertedQCD.fitEWK_FakeTaus(metInverted_EWK_FakeTaus, fitOptions) invertedQCD.fitEWK_FakeTaus(metBase_EWK_FakeTaus, fitOptions) invertedQCD.fitQCD(metInverted_QCD, fitOptions) invertedQCD.fitData(metBase_data) #===== Calculate normalization invertedQCD.getNormalization() invertedQCD.Summary() invertedQCD.WriteNormalizationToFile( "QCDInvertedNormalizationFactorsFilteredEWKFakeTaus.py") invertedQCD.WriteLatexOutput("fits.tex")
def main(argv, dsetMgr, moduleInfoString): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" comparisonList = ["AfterStdSelections"] dirs = [] dirs.append(sys.argv[1]) # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0], dsetMgr, name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove( filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) # Merge MC EWK samples as one EWK sample myMergeList = [] if "TT" in dsetMgr.getMCDatasetNames(): myMergeList.append("TT") # Powheg, no neg. weights -> large stats. else: myMergeList.append("TTJets") # Madgraph with negative weights print "Warning: using TTJets as input, but this is suboptimal. Please switch to the TT sample (much more stats.)." myMergeList.append("WJetsHT") myMergeList.append("DYJetsToLLHT") myMergeList.append("SingleTop") if "Diboson" in dsetMgr.getMCDatasetNames(): myMergeList.append("Diboson") print "Warning: ignoring diboson sample (since it does not exist) ..." for item in myMergeList: if not item in dsetMgr.getMCDatasetNames(): raise Exception( "Error: tried to use dataset '%s' as part of the merged EWK dataset, but the dataset '%s' does not exist!" % (item, item)) dsetMgr.merge("EWK", myMergeList) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME FITMIN = None FITMAX = None #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent( COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception( "Error: Cannot find histogram or directory of name '%s'!" % name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: binIndex = hname.replace( "NormalizationMETBaselineTau" + HISTONAME, "") hDummy = dsetMgr.getDataset("Data").getDatasetRootHisto( COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binIndex).getHistogram() title = hDummy.GetTitle() title = title.replace("METBaseline" + HISTONAME, "") if hDummy.Integral() > 0.0: bins.append(binIndex) if binIndex == "Inclusive": binLabels.append(binIndex) else: binLabels.append( QCDNormalization.getModifiedBinLabelString(title)) if FITMIN == None: FITMIN = hDummy.GetXaxis().GetXmin() FITMAX = hDummy.GetXaxis().GetXmax() hDummy.Delete() else: print "Skipping bin '%s' (%s) because it has no entries" % ( binIndex, QCDNormalization.getModifiedBinLabelString(title)) print "\nHistogram bins available", bins # Select bins by filter if len(selectOnlyBins) > 0: oldBinLabels = binLabels[:] oldBins = bins[:] binLabels = [] bins = [] for k in selectOnlyBins: for i in range(len(oldBinLabels)): if k == oldBinLabels[i] or k == oldBins[i]: binLabels.append(oldBinLabels[i]) bins.append(oldBins[i]) print "Using bins ", bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": " + binLabels[i] print line print #===== Initialize normalization calculator #manager = QCDNormalization.QCDNormalizationManagerExperimental1(binLabels) manager = QCDNormalization.QCDNormalizationManagerDefault( binLabels, dirs[0], moduleInfoString) #===== Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager) template_EWKFakeTaus_Baseline = manager.createTemplate( "EWKFakeTaus_Baseline") template_EWKFakeTaus_Inverted = manager.createTemplate( "EWKFakeTaus_Inverted") template_EWKGenuineTaus_Baseline = manager.createTemplate( "EWKGenuineTaus_Baseline") template_EWKGenuineTaus_Inverted = manager.createTemplate( "EWKGenuineTaus_Inverted") template_EWKInclusive_Baseline = manager.createTemplate( "EWKInclusive_Baseline") template_EWKInclusive_Inverted = manager.createTemplate( "EWKInclusive_Inverted") template_QCD_Baseline = manager.createTemplate("QCD_Baseline") template_QCD_Inverted = manager.createTemplate("QCD_Inverted") #===== Define fit functions and fit parameters # The available functions are defined in the FitFunction class in the QCDMeasurement/python/QCDNormalization.py file # commented out fitter for EWK fake taus, since only the fit on inclusive EWK is used to obtain w_QCD #boundary = 100 #template_EWKFakeTaus_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunctionInv", boundary=boundary, norm=1, rejectPoints=1), #FITMIN, FITMAX) #template_EWKFakeTaus_Baseline.setDefaultFitParam(defaultInitialValue=[10.0, 100, 45, 0.02], #defaultLowerLimit= [ 0.1, 70, 10, 0.001], #defaultUpperLimit= [ 30, 300, 100, 0.1]) # commented out fitter for EWK genuine taus, since only the fit on inclusive EWK is used to obtain w_QCD #boundary = 150 #template_EWKGenuineTaus_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1), #FITMIN, FITMAX) #template_EWKGenuineTaus_Baseline.setDefaultFitParam(defaultLowerLimit=[0.5, 90, 30, 0.0001], #defaultUpperLimit=[ 20, 150, 50, 1.0]) # Inclusive EWK boundary = 150 template_EWKInclusive_Baseline.setFitter( QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1), FITMIN, FITMAX) template_EWKInclusive_Baseline.setDefaultFitParam( defaultLowerLimit=[0.5, 90, 30, 0.0001], defaultUpperLimit=[20, 150, 50, 1.0]) # Note that the same function is used for QCD only and QCD+EWK fakes template_QCD_Inverted.setFitter( QCDNormalization.FitFunction("QCDFunction", norm=1), FITMIN, FITMAX) template_QCD_Inverted.setDefaultFitParam( defaultLowerLimit=[0.0001, 0.001, 0.1, 0.0, 10, 0.0001, 0.001], defaultUpperLimit=[200, 10, 10, 150, 100, 1, 0.05]) #===== Loop over tau pT bins for i, binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s" % binLabels[i] print "********************************\n" #===== Reset bin results manager.resetBinResults() #===== Obtain histograms for normalization # Data histoName = COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_data = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) histoName = COMBINEDHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_data = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) # EWK genuine taus histoName = GENUINEHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_EWK_GenuineTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) histoName = GENUINEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_EWK_GenuineTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) # EWK fake taus histoName = FAKEHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_EWK_FakeTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) histoName = FAKEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_EWK_FakeTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) # Finalize histograms by rebinning for histogram in [ hmetBase_data, hmetInverted_data, hmetBase_EWK_GenuineTaus, hmetInverted_EWK_GenuineTaus, hmetBase_EWK_FakeTaus, hmetInverted_EWK_FakeTaus ]: histogram.Rebin(_rebinFactor) #===== Obtain inclusive EWK histograms hmetBase_EWKinclusive = hmetBase_EWK_GenuineTaus.Clone( "EWKinclusiveBase") hmetBase_EWKinclusive.Add(hmetBase_EWK_FakeTaus, 1.0) hmetInverted_EWKinclusive = hmetInverted_EWK_GenuineTaus.Clone( "EWKinclusiveInv") hmetInverted_EWKinclusive.Add(hmetInverted_EWK_FakeTaus, 1.0) #===== Obtain histograms for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check hmetBase_QCD = hmetBase_data.Clone("QCDbase") hmetBase_QCD.Add(hmetBase_EWKinclusive, -1) hmetInverted_QCD = hmetInverted_data.Clone("QCDinv") hmetInverted_QCD.Add(hmetInverted_EWKinclusive, -1) #===== Set histograms to the templates template_EWKFakeTaus_Inverted.setHistogram( hmetInverted_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Inverted.setHistogram( hmetInverted_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Inverted.setHistogram( hmetInverted_EWKinclusive, binLabels[i]) template_QCD_Inverted.setHistogram(hmetInverted_QCD, binLabels[i]) template_EWKFakeTaus_Baseline.setHistogram(hmetBase_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Baseline.setHistogram( hmetBase_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Baseline.setHistogram( hmetBase_EWKinclusive, binLabels[i]) template_QCD_Baseline.setHistogram(hmetBase_QCD, binLabels[i]) #===== Make plots of templates manager.plotTemplates() #===== Fit individual templates to data fitOptions = "R B" # RBLW manager.calculateNormalizationCoefficients(hmetBase_data, fitOptions, FITMIN, FITMAX) #===== Calculate combined normalisation coefficient (f_fakes = w*f_QCD + (1-w)*f_EWKfakes) # Obtain histograms histoName = "ForDataDrivenCtrlPlots/shapeTransverseMass/shapeTransverseMass" + binStr dataMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) treatNegativeBins(dataMt, "Data_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKFakeTaus/shapeTransverseMass/shapeTransverseMass" + binStr ewkFakeTausMt = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkFakeTausMt, "ewkFakeTaus_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKGenuineTaus/shapeTransverseMass/shapeTransverseMass" + binStr ewkGenuineTausMt = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkGenuineTausMt, "ewkGenuineTaus_inverted mT") qcdMt = dataMt.Clone("QCD") qcdMt.Add(ewkFakeTausMt, -1) qcdMt.Add(ewkGenuineTausMt, -1) treatNegativeBins(qcdMt, "QCD_inverted mT") # Do calculation manager.calculateCombinedNormalizationCoefficient( qcdMt, ewkFakeTausMt) #===== Save normalization outFileName = "QCDNormalizationFactors_%s_%s.py" % (HISTONAME, moduleInfoString) print argv[1], outFileName outFileFullName = os.path.join(argv[1], outFileName) manager.writeScaleFactorFile(outFileFullName, moduleInfoString)
def main(argv, dsetMgr, moduleInfoString): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" comparisonList = ["AfterStdSelections"] dirs = [] dirs.append(sys.argv[1]) # Check multicrab consistency # consistencyCheck.checkConsistencyStandalone(dirs[0],dsetMgr,name="QCD inverted") #FIXME needs to be updated # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() if verbose: print "Datasets list (initial):" print dsetMgr.getMCDatasetNames() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove( filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "DY2JetsToLL" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "DY3JetsToLL" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove( filter(lambda name: "DY4JetsToLL" in name, dsetMgr.getAllDatasetNames())) # Ignore DY dataset with HERWIG hadronization (it's only for testing) dsetMgr.remove(filter(lambda name: "DYJetsToLL_M_50_HERWIGPP" in name, dsetMgr.getAllDatasetNames()), close=False) if verbose: print "Datasets after filter removals:" print dsetMgr.getMCDatasetNames() # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) if verbose: print "Datasets after mergeRenameReorderForDataMC:" print dsetMgr.getMCDatasetNames() # Only WJets/WJetsToLNu or WJetsToLNu_HT_* should be used (not both) if useWJetsHT: dsetMgr.remove(filter(lambda name: "WJets" == name, dsetMgr.getAllDatasetNames()), close=False) else: dsetMgr.remove(filter(lambda name: "WJetsHT" in name, dsetMgr.getAllDatasetNames()), close=False) print "Datasets used for EWK (after choosing between WJets or WJetsHT sample):" print dsetMgr.getMCDatasetNames() # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) # Merge MC EWK samples as one EWK sample myMergeList = [] # Always use TT (or TTJets) as a part of the EWK background if "TT" in dsetMgr.getMCDatasetNames(): myMergeList.append("TT") # Powheg, no neg. weights -> large stats. else: myMergeList.append("TTJets") # Madgraph with negative weights print "Warning: using TTJets as input, but this is suboptimal. Please switch to the TT sample (much more stats.)." # Always use WJets as a part of the EWK background if useWJetsHT: myMergeList.append("WJetsHT") else: myMergeList.append("WJets") # For SY, single top and diboson, use only if available: if "DYJetsToQQHT" in dsetMgr.getMCDatasetNames(): myMergeList.append("DYJetsToQQHT") if "DYJetsToLL" in dsetMgr.getMCDatasetNames(): myMergeList.append("DYJetsToLL") else: print "Warning: ignoring DYJetsToLL sample (since merged sample does not exist) ..." if "SingleTop" in dsetMgr.getMCDatasetNames(): myMergeList.append("SingleTop") else: print "Warning: ignoring single top sample (since merged sample does not exist) ..." if "Diboson" in dsetMgr.getMCDatasetNames(): myMergeList.append("Diboson") else: print "Warning: ignoring diboson sample (since merged sample does not exist) ..." for item in myMergeList: if not item in dsetMgr.getMCDatasetNames(): raise Exception( "Error: tried to use dataset '%s' as part of the merged EWK dataset, but the dataset '%s' does not exist!" % (item, item)) dsetMgr.merge("EWK", myMergeList) if verbose: print "\nFinal merged dataset list:\n" print dsetMgr.getMCDatasetNames() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME FITMIN = None FITMAX = None #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent( COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR + "/NormalizationMETBaselineTau" + HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception( "Error: Cannot find histogram or directory of name '%s'!" % name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau" + HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau" + HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: binIndex = hname.replace( "NormalizationMETBaselineTau" + HISTONAME, "") # print "DEBUG: We are looking for hisrogram "+COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binIndex hDummy = dsetMgr.getDataset("Data").getDatasetRootHisto( COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binIndex).getHistogram() title = hDummy.GetTitle() title = title.replace("METBaseline" + HISTONAME, "") if hDummy.Integral() > 0.0: bins.append(binIndex) if binIndex == "Inclusive": binLabels.append(binIndex) else: binLabels.append( QCDNormalization.getModifiedBinLabelString(title)) if FITMIN == None: FITMIN = hDummy.GetXaxis().GetXmin() FITMAX = hDummy.GetXaxis().GetXmax() hDummy.Delete() else: print "Skipping bin '%s' (%s) because it has no entries" % ( binIndex, QCDNormalization.getModifiedBinLabelString(title)) print "\nHistogram bins available", bins # Select bins by filter if len(selectOnlyBins) > 0: oldBinLabels = binLabels[:] oldBins = bins[:] binLabels = [] bins = [] for k in selectOnlyBins: for i in range(len(oldBinLabels)): if k == oldBinLabels[i] or k == oldBins[i]: binLabels.append(oldBinLabels[i]) bins.append(oldBins[i]) print "Using bins ", bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": " + binLabels[i] print line print #===== Initialize normalization calculator #manager = QCDNormalization.QCDNormalizationManagerExperimental1(binLabels) manager = QCDNormalization.QCDNormalizationManagerDefault( binLabels, dirs[0], moduleInfoString) #===== Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager) template_EWKFakeTaus_Baseline = manager.createTemplate( "EWKFakeTaus_Baseline") template_EWKFakeTaus_Inverted = manager.createTemplate( "EWKFakeTaus_Inverted") template_EWKGenuineTaus_Baseline = manager.createTemplate( "EWKGenuineTaus_Baseline") template_EWKGenuineTaus_Inverted = manager.createTemplate( "EWKGenuineTaus_Inverted") template_EWKInclusive_Baseline = manager.createTemplate( "EWKInclusive_Baseline") template_EWKInclusive_Inverted = manager.createTemplate( "EWKInclusive_Inverted") template_QCD_Baseline = manager.createTemplate("QCD_Baseline") template_QCD_Inverted = manager.createTemplate("QCD_Inverted") #===== Define fit functions and fit parameters # The available functions are defined in the FitFunction class in the QCDMeasurement/python/QCDNormalization.py file # Inclusive EWK # The function is essentially pure Gaussian up to boundary value, and exponential after that, i.e. # A*Gaus(x, mean,sigma) when x > boundary_x # A*Gaus(boundary_x, mean,sigma)*exp(-beta*x) # par[0] = overall normalization A # par[1] = mean # par[3] = sigma # par[4] = beta in the exponential tail boundary = 160 template_EWKInclusive_Baseline.setFitter( QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1), FITMIN, FITMAX) template_EWKInclusive_Baseline.setDefaultFitParam( defaultLowerLimit=[0.5, 90, 30, 0.0001], defaultUpperLimit=[30, 250, 60, 1.0]) # Fake tau and QCD # Note that the same function is used for QCD only and QCD+EWK fakes (=Fake Tau) # Old function, used until May 2017 # template_QCD_Inverted.setFitter(QCDNormalization.FitFunction("QCDFunction", norm=1), FITMIN, FITMAX) # template_QCD_Inverted.setDefaultFitParam(defaultLowerLimit=[ 30, 0.1, 0.1, 0, 10, 0.0, 0.0001], # defaultUpperLimit=[ 130, 20, 20, 200, 200, 1.0, 1.0]) # Latest version of the Rayleigh peak with shift + Gaussian + Exponential combination, used from March 2018, i.e. # A*((x-b)/sigma^2)*exp((x-b)^2/(2*sigma^2))+B*Gaus(x,mean,sigma2)+C*exp(-beta*x) # par[0] sigma for Rayielgh term # par[1] overall normalization A # par[2] peak shift b for Rayleigh term # par[3] normalization B for the Gaussian term # par[4] normalization C for the exponential tail # par[5] mean for Gaussian term # par[6] sigma2 for Gaussian term # par[7] beta for exponential tail template_QCD_Inverted.setFitter( QCDNormalization.FitFunction("QCDFunctionWithPeakShiftClear", norm=1), FITMIN, FITMAX) template_QCD_Inverted.setDefaultFitParam( defaultLowerLimit=[30, 0.1, -10, 0, -20, 10, 0.0001, 0.0001], defaultUpperLimit=[130, 20, 10, 20, 200, 100, 1.0, 0.05]) #===== Loop over tau pT bins for i, binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s" % binLabels[i] print "********************************\n" #===== Reset bin results manager.resetBinResults() #===== Obtain histograms for normalization # Data histoName = COMBINEDHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_data = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) histoName = COMBINEDHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_data = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) # EWK genuine taus histoName = GENUINEHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_EWK_GenuineTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) histoName = GENUINEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_EWK_GenuineTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) # EWK fake taus histoName = FAKEHISTODIR + "/" + BASELINETAUHISTONAME + binStr hmetBase_EWK_FakeTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) histoName = FAKEHISTODIR + "/" + INVERTEDTAUHISTONAME + binStr hmetInverted_EWK_FakeTaus = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) #===== Obtain inclusive EWK histograms hmetBase_EWKinclusive = hmetBase_EWK_GenuineTaus.Clone( "EWKinclusiveBase") hmetBase_EWKinclusive.Add(hmetBase_EWK_FakeTaus, 1.0) hmetInverted_EWKinclusive = hmetInverted_EWK_GenuineTaus.Clone( "EWKinclusiveInv") hmetInverted_EWKinclusive.Add(hmetInverted_EWK_FakeTaus, 1.0) # Finalize histograms by rebinning for histogram in [ hmetBase_data, hmetInverted_data, hmetBase_EWK_GenuineTaus, hmetInverted_EWK_GenuineTaus, hmetBase_EWKinclusive, hmetBase_EWK_FakeTaus, hmetInverted_EWK_FakeTaus, hmetInverted_EWKinclusive ]: histogram.Rebin(_rebinFactor) #===== Obtain histograms for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check hmetBase_QCD = hmetBase_data.Clone("QCDbase") hmetBase_QCD.Add(hmetBase_EWKinclusive, -1) hmetInverted_QCD = hmetInverted_data.Clone("QCDinv") hmetInverted_QCD.Add(hmetInverted_EWKinclusive, -1) #===== Set histograms to the templates template_EWKFakeTaus_Inverted.setHistogram( hmetInverted_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Inverted.setHistogram( hmetInverted_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Inverted.setHistogram( hmetInverted_EWKinclusive, binLabels[i]) template_QCD_Inverted.setHistogram(hmetInverted_QCD, binLabels[i]) template_EWKFakeTaus_Baseline.setHistogram(hmetBase_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Baseline.setHistogram( hmetBase_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Baseline.setHistogram( hmetBase_EWKinclusive, binLabels[i]) template_QCD_Baseline.setHistogram(hmetBase_QCD, binLabels[i]) #===== Make plots of templates manager.plotTemplates() #===== Fit individual templates to data fitOptions = "R B L W M" # RBLWM manager.calculateNormalizationCoefficients(hmetBase_data, fitOptions, FITMIN, FITMAX) #===== Calculate combined normalisation coefficient (f_fakes = w*f_QCD + (1-w)*f_EWKfakes) # Obtain histograms histoName = "ForDataDrivenCtrlPlots/shapeTransverseMass/shapeTransverseMass" + binStr dataMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto( "Data").getRootHisto().Clone(histoName) treatNegativeBins(dataMt, "Data_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKFakeTaus/shapeTransverseMass/shapeTransverseMass" + binStr ewkFakeTausMt = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkFakeTausMt, "ewkFakeTaus_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKGenuineTaus/shapeTransverseMass/shapeTransverseMass" + binStr ewkGenuineTausMt = plots.DataMCPlot( dsetMgr, histoName).histoMgr.getHisto( "EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkGenuineTausMt, "ewkGenuineTaus_inverted mT") qcdMt = dataMt.Clone("QCD") qcdMt.Add(ewkFakeTausMt, -1) qcdMt.Add(ewkGenuineTausMt, -1) treatNegativeBins(qcdMt, "QCD_inverted mT") # Do calculation manager.calculateCombinedNormalizationCoefficient( qcdMt, ewkFakeTausMt) #===== Save normalization outFileName = "QCDNormalizationFactors_%s_%s.py" % (HISTONAME, moduleInfoString) outFileFullName = os.path.join(argv[1], outFileName) manager.writeScaleFactorFile(outFileFullName, moduleInfoString)
def main(argv): # HISTONAME = "TauIdJets" # HISTONAME = "TauIdJetsCollinear" # HISTONAME = "TauIdBtag" # HISTONAME = "TauIdBvetoCollinear" # HISTONAME = "TauIdBveto" HISTONAME = "TauIdAfterCollinearCuts" FAKEHISTO = "OnlyEWKFakeTaus" GENUINEHISTO = "PlusFilteredEWKFakeTaus" dirs = [] if len(sys.argv) < 2: usage() dirs.append(sys.argv[1]) # Create all datasets from a multicrab task datasets = dataset.getDatasetsFromMulticrabDirs(dirs,dataEra=dataEra, searchMode=searchMode, analysisName="signalAnalysisInvertedTau",optimizationMode="") #no collinear #datasets = dataset.getDatasetsFromMulticrabDirs(dirs,dataEra=dataEra, searchMode=searchMode, analysisName="signalAnalysisInvertedTau",optimizationMode="OptQCDTailKillerLoosePlus") #collinear # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0],datasets,name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input datasets.updateNAllEventsToPUWeighted() # Read integrated luminosities of data datasets from lumi.json datasets.loadLuminosities() # Include only 120 mass bin of HW and HH datasets datasets.remove(filter(lambda name: "TTToHplus" in name and not "M120" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "HplusTB" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_t-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_tW-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_SemiLept" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_FullLept" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_Hadronic" in name, datasets.getAllDatasetNames())) # Default merging nad ordering of data and MC datasets # All data datasets to "Data" # All QCD datasets to "QCD" # All single top datasets to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(datasets) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(datasets, br_tH=0.05, br_Htaunu=1) # Merge WH and HH datasets to one (for each mass bin) # TTToHplusBWB_MXXX and TTToHplusBHminusB_MXXX to "TTToHplus_MXXX" plots.mergeWHandHH(datasets) datasets.merge("EWK", [ "TTJets", "WJets", "DYJetsToLL", "SingleTop", "Diboson" ]) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) invertedQCD = InvertedTauID(False) invertedQCD.setLumi(datasets.getDataset("Data").getLuminosity()) invertedQCD.setInfo([dataEra,searchMode,HISTONAME]) invertedQCD_separatedFakes = InvertedTauID(True) invertedQCD_separatedFakes.setLumi(datasets.getDataset("Data").getLuminosity()) invertedQCD_separatedFakes.setInfo([dataEra,searchMode,HISTONAME]) histonames = datasets.getDataset("Data").getDirectoryContent("baseline/METBaseline"+HISTONAME) bins = [] binLabels = [] for histoname in histonames: bins.append(histoname.replace("METBaseline"+HISTONAME,"")) title = datasets.getDataset("Data").getDatasetRootHisto("baseline/METBaseline"+HISTONAME+"/"+histoname).getHistogram().GetTitle() title = title.replace("METBaseline"+HISTONAME,"") title = title.replace("#tau p_{T}","taup_T") title = title.replace("#tau eta","taueta") title = title.replace("<","lt") title = title.replace(">","gt") title = title.replace("=","eq") title = title.replace("..","to") title = title.replace(".","p") title = title.replace("/","_") binLabels.append(title) #binLabels = bins # for this data set print print "Histogram bins available",bins # bins = ["Inclusive"] # bins = ["taup_Tleq50","taup_Teq50to60"] print "Using bins ",bins print print "Bin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": "+binLabels[i] print line print for i,bin in enumerate(bins): invertedQCD.setLabel(binLabels[i]) invertedQCD_separatedFakes.setLabel(binLabels[i]) metBase = plots.DataMCPlot(datasets, "baseline/METBaseline"+HISTONAME+"/METBaseline"+HISTONAME+bin) metInver = plots.DataMCPlot(datasets, "Inverted/METInverted"+HISTONAME+"/METInverted"+HISTONAME+bin) #metBase_EWK = plots.DataMCPlot(datasets, "baseline/METBaseline"+HISTONAME+"/METBaseline"+HISTONAME+bin) #metInver_EWK = plots.DataMCPlot(datasets, "Inverted/METInverted"+HISTONAME+"/METInverted"+HISTONAME+bin) metBase_GenuineTaus = plots.DataMCPlot(datasets, "baseline/METBaseline"+HISTONAME+GENUINEHISTO+"/METBaseline"+HISTONAME+GENUINEHISTO+bin) metInver_GenuineTaus = plots.DataMCPlot(datasets, "Inverted/METInverted"+HISTONAME+GENUINEHISTO+"/METInverted"+HISTONAME+GENUINEHISTO+bin) metBase_FakeTaus = plots.DataMCPlot(datasets, "baseline/METBaseline"+HISTONAME+FAKEHISTO+"/METBaseline"+HISTONAME+FAKEHISTO+bin) metInver_FakeTaus = plots.DataMCPlot(datasets, "Inverted/METInverted"+HISTONAME+FAKEHISTO+"/METInverted"+HISTONAME+FAKEHISTO+bin) # Rebin before subtracting metBase.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metInver.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 #metBase_EWK.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 #metInver_EWK.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metBase_GenuineTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metInver_GenuineTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metBase_FakeTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metInver_FakeTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(10)) #5 metInverted_data = metInver.histoMgr.getHisto("Data").getRootHisto().Clone("Inverted/METInverted"+HISTONAME+"/METInverted"+HISTONAME+bin) metBase_data = metBase.histoMgr.getHisto("Data").getRootHisto().Clone("baseline/METBaseLine"+HISTONAME+"/METBaseline"+HISTONAME+bin) metInverted_EWK = metInver.histoMgr.getHisto("EWK").getRootHisto().Clone("Inverted/METInverted"+HISTONAME+"/METInverted"+HISTONAME+bin) metBase_EWK = metBase.histoMgr.getHisto("EWK").getRootHisto().Clone("baseline/METBaseLine"+HISTONAME+"/METBaseline"+HISTONAME+bin) metInverted_EWK_GenuineTaus = metInver_GenuineTaus.histoMgr.getHisto("EWK").getRootHisto().Clone("Inverted/METInverted"+HISTONAME+GENUINEHISTO+"/METInverted"+HISTONAME+GENUINEHISTO+bin) metBase_EWK_GenuineTaus = metBase_GenuineTaus.histoMgr.getHisto("EWK").getRootHisto().Clone("baseline/METBaseLine"+HISTONAME+GENUINEHISTO+"/METBaseline"+HISTONAME+GENUINEHISTO+bin) metInverted_EWK_FakeTaus = metInver_FakeTaus.histoMgr.getHisto("EWK").getRootHisto().Clone("Inverted/METInverted"+HISTONAME+FAKEHISTO+"/METInverted"+HISTONAME+FAKEHISTO+bin) metBase_EWK_FakeTaus = metBase_FakeTaus.histoMgr.getHisto("EWK").getRootHisto().Clone("baseline/METBaseLine"+HISTONAME+FAKEHISTO+"/METBaseline"+HISTONAME+FAKEHISTO+bin) metBase_QCD = metBase_data.Clone("QCD") metBase_QCD.Add(metBase_EWK,-1) metInverted_QCD = metInverted_data.Clone("QCD") metInverted_QCD.Add(metInverted_EWK,-1) metBase_QCD_separatedFakes = metBase_data.Clone("QCD") metBase_QCD_separatedFakes.Add(metBase_EWK_GenuineTaus,-1) metBase_QCD_separatedFakes.Add(metBase_EWK_FakeTaus,-1) metInverted_QCD_separatedFakes = metInverted_data.Clone("QCD") metInverted_QCD_separatedFakes.Add(metInverted_EWK_GenuineTaus,-1) metInverted_QCD_separatedFakes.Add(metInverted_EWK_FakeTaus,-1) metInverted_data = addlabels(metInverted_data) metInverted_EWK = addlabels(metInverted_EWK) metInverted_EWK_GenuineTaus = addlabels(metInverted_EWK_GenuineTaus) metInverted_EWK_FakeTaus = addlabels(metInverted_EWK_FakeTaus) metBase_data = addlabels(metBase_data) metBase_EWK = addlabels(metBase_EWK) metBase_EWK_GenuineTaus = addlabels(metBase_EWK_GenuineTaus) metBase_EWK_FakeTaus = addlabels(metBase_EWK_FakeTaus) metInverted_QCD = addlabels(metInverted_QCD) metInverted_QCD_separatedFakes = addlabels(metInverted_QCD_separatedFakes) #non-separated invertedQCD.plotHisto(metInverted_data,"inverted") invertedQCD.plotHisto(metInverted_EWK,"invertedEWK") invertedQCD.plotHisto(metBase_data,"baseline") invertedQCD.plotHisto(metBase_EWK,"baselineEWK") fitOptions = "LRB" invertedQCD.fitEWK(metInverted_EWK,fitOptions) invertedQCD.fitEWK(metBase_EWK,fitOptions) invertedQCD.fitQCD(metInverted_QCD,fitOptions) invertedQCD.fitData(metBase_data) invertedQCD.getNormalization() #separated invertedQCD_separatedFakes.plotHisto(metInverted_data,"inverted") invertedQCD_separatedFakes.plotHisto(metInverted_EWK_GenuineTaus,"invertedEWKGenuineTaus") invertedQCD_separatedFakes.plotHisto(metInverted_EWK_FakeTaus,"invertedEWKFakeTaus") invertedQCD_separatedFakes.plotHisto(metBase_data,"baseline") invertedQCD_separatedFakes.plotHisto(metBase_EWK_GenuineTaus,"baselineEWKGenuineTaus") invertedQCD_separatedFakes.plotHisto(metBase_EWK_FakeTaus,"baselineEWKFakeTaus") invertedQCD_separatedFakes.fitEWK_GenuineTaus(metInverted_EWK_GenuineTaus,fitOptions) invertedQCD_separatedFakes.fitEWK_GenuineTaus(metBase_EWK_GenuineTaus,fitOptions) invertedQCD_separatedFakes.fitEWK_FakeTaus(metInverted_EWK_FakeTaus,fitOptions) invertedQCD_separatedFakes.fitEWK_FakeTaus(metBase_EWK_FakeTaus,fitOptions) invertedQCD_separatedFakes.fitQCD(metInverted_QCD_separatedFakes,fitOptions) invertedQCD_separatedFakes.fitData(metBase_data) invertedQCD_separatedFakes.getNormalization() invertedQCD.Summary() invertedQCD.WriteNormalizationToFile("QCDInvertedNormalizationFactors.py") invertedQCD.WriteLatexOutput("fits.tex") invertedQCD_separatedFakes.Summary() invertedQCD_separatedFakes.WriteNormalizationToFile("QCDInvertedNormalizationFactorsSeparatedFakeTaus.py") invertedQCD_separatedFakes.WriteLatexOutput("fits_separatedfaketaus.tex") mergeNormalizations("QCDInvertedNormalizationFactors.py","QCDInvertedNormalizationFactorsSeparatedFakeTaus.py")
def main(argv): dirs = [] if len(sys.argv) < 2: usage() dirs.append(sys.argv[1]) comparisonList = ["AfterStdSelections"] # Create all datasets from a multicrab task datasets = dataset.getDatasetsFromMulticrabDirs(dirs, dataEra=dataEra, searchMode=searchMode, analysisName=analysis) #print datasets.getDatasetNames() #print datasets # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0], datasets, name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input datasets.updateNAllEventsToPUWeighted() # Read integrated luminosities of data datasets from lumi.json datasets.loadLuminosities() # Include only 120 mass bin of HW and HH datasets datasets.remove( filter(lambda name: "TTToHplus" in name and not "M120" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "HplusTB" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "Hplus_taunu_t-channel" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "Hplus_taunu_tW-channel" in name, datasets.getAllDatasetNames())) #datasets.remove(filter(lambda name: "TTJets_SemiLept" in name, datasets.getAllDatasetNames())) #datasets.remove(filter(lambda name: "TTJets_FullLept" in name, datasets.getAllDatasetNames())) #datasets.remove(filter(lambda name: "TTJets_Hadronic" in name, datasets.getAllDatasetNames())) # Default merging nad ordering of data and MC datasets # All data datasets to "Data" # All QCD datasets to "QCD" # All single top datasets to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(datasets) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(datasets, br_tH=0.05, br_Htaunu=1) # Merge WH and HH datasets to one (for each mass bin) # TTToHplusBWB_MXXX and TTToHplusBHminusB_MXXX to "TTToHplus_MXXX" plots.mergeWHandHH(datasets) datasets.merge( "EWK", [ "TTJets", "WJets", "DYJetsToLL", "SingleTop", # "Diboson" ]) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: invertedQCD = InvertedTauID() invertedQCD.setLumi(datasets.getDataset("Data").getLuminosity()) invertedQCD.setInfo([dataEra, searchMode, HISTONAME]) histonames = datasets.getDataset("Data").getDirectoryContent( "ForQCDNormalization/NormalizationMETBaselineTau" + HISTONAME) bins = [] binLabels = [] for histoname in histonames: bins.append( histoname.replace("NormalizationMETBaselineTau" + HISTONAME, "")) title = datasets.getDataset("Data").getDatasetRootHisto( "ForQCDNormalization/NormalizationMETBaselineTau" + HISTONAME + "/" + histoname).getHistogram().GetTitle() title = title.replace("METBaseline" + HISTONAME, "") title = title.replace("#tau p_{T}", "taup_T") title = title.replace("#tau eta", "taueta") title = title.replace("<", "lt") title = title.replace(">", "gt") title = title.replace("=", "eq") title = title.replace("..", "to") title = title.replace(".", "p") title = title.replace("/", "_") binLabels.append(title) binLabels = bins # for this data set print print "Histogram bins available", bins # bins = ["Inclusive"] # bins = ["taup_Tleq50","taup_Teq50to60"] print "Using bins ", bins print print "Bin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": " + binLabels[i] print line print for i, bin in enumerate(bins): invertedQCD.setLabel(binLabels[i]) metBase = plots.DataMCPlot( datasets, "ForQCDNormalization/NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME + bin) metInver = plots.DataMCPlot( datasets, "ForQCDNormalization/NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME + bin) # Rebin before subtracting RebinFactor = 10 metBase.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver.histoMgr.forEachHisto( lambda h: h.getRootHisto().Rebin(RebinFactor)) metInverted_data = metInver.histoMgr.getHisto("Data").getRootHisto( ).Clone("ForQCDNormalization/NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME + bin) metInverted_EWK = metInver.histoMgr.getHisto("EWK").getRootHisto( ).Clone("ForQCDNormalization/NormalizationMETInvertedTau" + HISTONAME + "/NormalizationMETInvertedTau" + HISTONAME + bin) metBase_data = metBase.histoMgr.getHisto("Data").getRootHisto( ).Clone("ForQCDNormalization/NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME + bin) metBase_EWK = metBase.histoMgr.getHisto("EWK").getRootHisto( ).Clone("ForQCDNormalization/NormalizationMETBaselineTau" + HISTONAME + "/NormalizationMETBaselineTau" + HISTONAME + bin) metBase_QCD = metBase_data.Clone("QCD") metBase_QCD.Add(metBase_EWK, -1) metInverted_QCD = metInverted_data.Clone("QCD") metInverted_QCD.Add(metInverted_EWK, -1) metInverted_data = addlabels(metInverted_data) metInverted_EWK = addlabels(metInverted_EWK) metBase_data = addlabels(metBase_data) metBase_EWK = addlabels(metBase_EWK) metInverted_QCD = addlabels(metInverted_QCD) invertedQCD.plotHisto(metInverted_data, "inverted") invertedQCD.plotHisto(metInverted_EWK, "invertedEWK") invertedQCD.plotHisto(metBase_data, "baseline") invertedQCD.plotHisto(metBase_EWK, "baselineEWK") fitOptions = "RB" invertedQCD.fitEWK(metInverted_EWK, fitOptions) invertedQCD.fitEWK(metBase_EWK, fitOptions) invertedQCD.fitQCD(metInverted_QCD, fitOptions) invertedQCD.fitData(metBase_data) invertedQCD.getNormalization() invertedQCD.Summary() normalizationFileName = HISTONAME #.replace("TauIdAfterCollinearCuts","") if HISTONAME == "TauIdAfterCollinearCutsPlusFilteredEWKFakeTaus": normalizationFileName = normalizationFileName.replace("Plus", "") invertedQCD.WriteNormalizationToFile( "QCDInvertedNormalizationFactors" + normalizationFileName + ".py") invertedQCD.WriteLatexOutput("fits" + normalizationFileName + ".tex")
def main(argv): dirs = [] if len(sys.argv) < 2: usage() dirs.append(sys.argv[1]) comparisonList = ["AfterStdSelections"] # Create all datasets from a multicrab task datasets = dataset.getDatasetsFromMulticrabDirs(dirs,dataEra=dataEra, searchMode=searchMode, analysisName=analysis) #print datasets.getDatasetNames() print " dirs ",dirs[0] # Check multicrab consistency # consistencyCheck.checkConsistencyStandalone(dirs[0],datasets,name="CorrelationAnalysis") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input datasets.updateNAllEventsToPUWeighted() # Read integrated luminosities of data datasets from lumi.json datasets.loadLuminosities() # Include only 120 mass bin of HW and HH datasets #datasets.remove(filter(lambda name: "TTToHplus" in name and not "M120" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTToHplusBWB" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "HplusTB" in name and not "M_500" in name, datasets.getAllDatasetNames())) # datasets.remove(filter(lambda name: "HplusTB" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_t-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_tW-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_SemiLept" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets" in name, datasets.getAllDatasetNames())) #datasets.remove(filter(lambda name: "DYJetsToLL_M_50_HT" in name, datasets.getAllDatasetNames())) # datasets.remove(filter(lambda name: "QCD" in name, datasets.getAllDatasetNames())) #datasets.remove(filter(lambda name: "WJetsToLNu" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: ("DYJetsToLL_M_10to50" in name or "DYJetsToLL_M_50" in name) and not "DYJetsToLL_M_50_HT" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "WJetsToLNu" in name and not "WJetsToLNu_HT" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "DY2JetsToLL" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "DY3JetsToLL" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "DY4JetsToLL" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "ST" in name, datasets.getAllDatasetNames())) # Default merging nad ordering of data and MC datasets # All data datasets to "Data" # All QCD datasets to "QCD" # All single top datasets to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(datasets) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(datasets, br_tH=0.05, br_Htaunu=1) # Merge WH and HH datasets to one (for each mass bin) # TTToHplusBWB_MXXX and TTToHplusBHminusB_MXXX to "TTToHplus_MXXX" plots.mergeWHandHH(datasets) # datasets.getDataset("TTbar_HBWB_HToTauNu_M_160_13TeV_pythia6").setCrossSection(0.336902*2*0.955592) # pb # At the moment the collision energy must be set by hand # for dset in datasets.getMCDatasets(): # dset.setEnergy("13") # At the moment the cross sections must be set by hand #xsect.setBackgroundCrossSections(datasets) # datasets.merge("EWK", [ # "TT", # "WJetsHT", # "DYJetsToLLHT", # "SingleTop", # "Diboson" # ]) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) dataMCExample(datasets) # MtComparison(datasets) MetComparisonBaselineVsInverted(datasets) # MetComparison(datasets) # TauPtComparison(datasets) # Print counters doCounters(datasets) # Script execution can be paused like this, it will continue after # user has given some input (which must include enter) if drawToScreen: raw_input("Hit enter to continue")
def main(argv, dsetMgr, moduleInfoString): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" #QCDMCHISTODIR = "ForQCDNormalizationEWKFakeTaus" QCDMCHISTODIR = "ForQCDNormalization" comparisonList = ["AfterStdSelections"] dirs = [] dirs.append(sys.argv[1]) # Check multicrab consistency # consistencyCheck.checkConsistencyStandalone(dirs[0],dsetMgr,name="QCD inverted") #FIXME needs to be updated # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() if verbose: print "Datasets list (initial):" print dsetMgr.getMCDatasetNames() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove(filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "DY2JetsToLL" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "DY3JetsToLL" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "DY4JetsToLL" in name, dsetMgr.getAllDatasetNames())) # Ignore DY dataset with HERWIG hadronization (it's only for testing) dsetMgr.remove(filter(lambda name: "DYJetsToLL_M_50_HERWIGPP" in name, dsetMgr.getAllDatasetNames()), close=False) if verbose: print "Datasets after filter removals:" print dsetMgr.getMCDatasetNames() # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) if verbose: print "Datasets after mergeRenameReorderForDataMC:" print dsetMgr.getMCDatasetNames() # Only WJets/WJetsToLNu or WJetsToLNu_HT_* should be used (not both) if useWJetsHT: dsetMgr.remove(filter(lambda name: "WJets"==name, dsetMgr.getAllDatasetNames()), close=False) else: dsetMgr.remove(filter(lambda name: "WJetsHT" in name, dsetMgr.getAllDatasetNames()), close=False) # Only TT or TT_Mtt_* should be used (not both) if useTT_Mtt: dsetMgr.remove(filter(lambda name: "TT"==name, dsetMgr.getAllDatasetNames()), close=False) else: dsetMgr.remove(filter(lambda name: "TT_Mtt" in name, dsetMgr.getAllDatasetNames()), close=False) print "Datasets used for EWK (after choosing between WJets or WJetsHT sample):" print dsetMgr.getMCDatasetNames() # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) # Merge MC EWK samples as one EWK sample myMergeList = [] # Always use TT (or TTJets) as a part of the EWK background if useTT_Mtt: myMergeList.append("TT_Mtt") elif "TT" in dsetMgr.getMCDatasetNames(): myMergeList.append("TT") # Powheg, no neg. weights -> large stats. else: myMergeList.append("TTJets") # Madgraph with negative weights print "Warning: using TTJets as input, but this is suboptimal. Please switch to the TT sample (much more stats.)." # Always use WJets as a part of the EWK background if useWJetsHT: myMergeList.append("WJetsHT") else: myMergeList.append("WJets") # For SY, single top and diboson, use only if available: if "DYJetsToQQHT" in dsetMgr.getMCDatasetNames(): myMergeList.append("DYJetsToQQHT") if "DYJetsToLL" in dsetMgr.getMCDatasetNames(): myMergeList.append("DYJetsToLL") else: print "Warning: ignoring DYJetsToLL sample (since merged sample does not exist) ..." if "SingleTop" in dsetMgr.getMCDatasetNames(): myMergeList.append("SingleTop") else: print "Warning: ignoring single top sample (since merged sample does not exist) ..." if "Diboson" in dsetMgr.getMCDatasetNames(): myMergeList.append("Diboson") else: print "Warning: ignoring diboson sample (since merged sample does not exist) ..." for item in myMergeList: if not item in dsetMgr.getMCDatasetNames(): raise Exception("Error: tried to use dataset '%s' as part of the merged EWK dataset, but the dataset '%s' does not exist!"%(item,item)) dsetMgr.merge("EWK", myMergeList) myQCDMergeList = [] if "QCD" in dsetMgr.getMCDatasetNames(): myQCDMergeList.append("QCD") dsetMgr.merge("QCDMC", myQCDMergeList) if verbose: print "\nFinal merged dataset list:\n" print dsetMgr.getMCDatasetNames() # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME+"/NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME+"/NormalizationMETInvertedTau"+HISTONAME FITMIN = None FITMAX = None #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent(COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception("Error: Cannot find histogram or directory of name '%s'!"%name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: binIndex = hname.replace("NormalizationMETBaselineTau"+HISTONAME,"") # print "DEBUG: We are looking for hisrogram "+COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binIndex hDummy = dsetMgr.getDataset("Data").getDatasetRootHisto(COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binIndex).getHistogram() title = hDummy.GetTitle() title = title.replace("METBaseline"+HISTONAME,"") if hDummy.Integral() > 0.0: bins.append(binIndex) if binIndex == "Inclusive": binLabels.append(binIndex) else: binLabels.append(QCDNormalization.getModifiedBinLabelString(title)) if FITMIN == None: FITMIN = hDummy.GetXaxis().GetXmin() FITMAX = hDummy.GetXaxis().GetXmax() hDummy.Delete() else: print "Skipping bin '%s' (%s) because it has no entries"%(binIndex, QCDNormalization.getModifiedBinLabelString(title)) print "\nHistogram bins available",bins # Select bins by filter if len(selectOnlyBins) > 0: oldBinLabels = binLabels[:] oldBins = bins[:] binLabels = [] bins = [] for k in selectOnlyBins: for i in range(len(oldBinLabels)): if k == oldBinLabels[i] or k == oldBins[i]: binLabels.append(oldBinLabels[i]) bins.append(oldBins[i]) print "Using bins ",bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": "+binLabels[i] print line print #===== Initialize normalization calculator #manager = QCDNormalization.QCDNormalizationManagerExperimental1(binLabels) manager = QCDNormalization.QCDNormalizationManagerDefault(binLabels, dirs[0], moduleInfoString) #===== Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager) template_EWKFakeTaus_Baseline = manager.createTemplate("EWKFakeTaus_Baseline") template_EWKFakeTaus_Inverted = manager.createTemplate("EWKFakeTaus_Inverted") template_EWKGenuineTaus_Baseline = manager.createTemplate("EWKGenuineTaus_Baseline") template_EWKGenuineTaus_Inverted = manager.createTemplate("EWKGenuineTaus_Inverted") template_EWKInclusive_Baseline = manager.createTemplate("EWKInclusive_Baseline") template_EWKInclusive_Inverted = manager.createTemplate("EWKInclusive_Inverted") template_QCD_Baseline = manager.createTemplate("QCD_Baseline") template_QCD_Inverted = manager.createTemplate("QCD_Inverted") template_FakeTau_Baseline = manager.createTemplate("FakeTau_Baseline") template_FakeTau_Inverted = manager.createTemplate("FakeTau_Inverted") if "QCDMC" in dsetMgr.getMCDatasetNames(): template_QCDMC_Baseline = manager.createTemplate("QCDMC_Baseline") template_QCDMC_Inverted = manager.createTemplate("QCDMC_Inverted") #===== Define fit functions and fit parameters # The available functions are defined in the FitFunction class in the QCDMeasurement/python/QCDNormalization.py file # Inclusive EWK # The function is essentially pure Gaussian up to boundary value, and exponential after that, i.e. # A*Gaus(x, mean,sigma) when x > boundary_x # A*Gaus(boundary_x, mean,sigma)*exp(-beta*x) # par[0] = overall normalization A # par[1] = mean # par[3] = sigma # par[4] = beta in the exponential tail boundary = 170 # 170 good for RtauMore, 180 better for RtauLess # QCD template_EWKInclusive_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1),FITMIN, FITMAX) template_EWKInclusive_Baseline.setDefaultFitParam(defaultLowerLimit=[0.5, 90, 30, 0.0001], defaultUpperLimit=[ 30, 250, 60, 1.0]) # Note that the same function is used for QCD only and QCD+EWK fakes (=Fake Tau) # Old function, used until May 2017 # template_QCD_Inverted.setFitter(QCDNormalization.FitFunction("QCDFunction", norm=1), FITMIN, FITMAX) # template_QCD_Inverted.setDefaultFitParam(defaultLowerLimit=[ 30, 0.1, 0.1, 0, 10, 0.0, 0.0001], # defaultUpperLimit=[ 130, 20, 20, 200, 200, 1.0, 1.0]) # Latest version of the Rayleigh peak with shift + Gaussian + Exponential combination, used from March 2018, i.e. # A*((x-b)/sigma^2)*exp((x-b)^2/(2*sigma^2))+B*Gaus(x,mean,sigma2)+C*exp(-beta*x) # par[0] sigma for Rayielgh term # par[1] overall normalization A # par[2] peak shift b for Rayleigh term # par[3] normalization B for the Gaussian term # par[4] normalization C for the exponential tail # par[5] mean for Gaussian term # par[6] sigma2 for Gaussian term # par[7] beta for exponential tail template_QCD_Inverted.setFitter(QCDNormalization.FitFunction("QCDFunctionWithPeakShiftClear", norm=1), FITMIN, FITMAX) # Works for 1pr: template_QCD_Inverted.setDefaultFitParam(defaultLowerLimit=[ 30, 0.1, -10, 0, -20, 10, 0.0001, 0.0001], defaultUpperLimit=[ 130, 20, 10, 20, 200, 100, 1.0, 0.05]) # Attempt for 3pr: # template_QCD_Inverted.setDefaultFitParam(defaultLowerLimit=[ 30, 1, -20, 0, 20, 10, 0.0001, 0.0001], # defaultUpperLimit=[130, 10, 20, 15, 200, 50, 1.0, 0.05]) #===== Loop over tau pT bins for i,binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s"%binLabels[i] print "********************************\n" #===== Reset bin results manager.resetBinResults() #===== Obtain histograms for normalization # Data histoName = COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_data = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) histoName = COMBINEDHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_data = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) # EWK genuine taus histoName = GENUINEHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_EWK_GenuineTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) histoName = GENUINEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_EWK_GenuineTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) # EWK fake taus histoName = FAKEHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_EWK_FakeTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) histoName = FAKEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_EWK_FakeTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) #===== Obtain inclusive EWK histograms hmetBase_EWKinclusive = hmetBase_EWK_GenuineTaus.Clone("EWKinclusiveBase") hmetBase_EWKinclusive.Add(hmetBase_EWK_FakeTaus, 1.0) hmetInverted_EWKinclusive = hmetInverted_EWK_GenuineTaus.Clone("EWKinclusiveInv") hmetInverted_EWKinclusive.Add(hmetInverted_EWK_FakeTaus, 1.0) #===== Obtain QCDMC histograms (not used, only for cross check) histoName = QCDMCHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_QCDMC = None if plots.DataMCPlot(dsetMgr, histoName).histoMgr.hasHisto("QCDMC"): hmetBase_QCDMC = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("QCDMC").getRootHisto().Clone(histoName) histoName = QCDMCHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_QCDMC = None if plots.DataMCPlot(dsetMgr, histoName).histoMgr.hasHisto("QCDMC"): hmetInverted_QCDMC = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("QCDMC").getRootHisto().Clone(histoName) # Finalize histograms by rebinning for histogram in [hmetBase_data, hmetInverted_data, hmetBase_EWK_GenuineTaus, hmetInverted_EWK_GenuineTaus, hmetBase_EWKinclusive, hmetBase_EWK_FakeTaus, hmetInverted_EWK_FakeTaus, hmetInverted_EWKinclusive]: histogram.Rebin(_rebinFactor) #===== Obtain histograms for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check hmetBase_QCD = hmetBase_data.Clone("QCDbase") hmetBase_QCD.Add(hmetBase_EWKinclusive,-1) hmetInverted_QCD = hmetInverted_data.Clone("QCDinv") hmetInverted_QCD.Add(hmetInverted_EWKinclusive,-1) hmetBase_FakeTau = hmetBase_data.Clone("QCDbase") hmetBase_FakeTau.Add(hmetBase_EWK_GenuineTaus,-1) hmetInverted_FakeTau = hmetInverted_data.Clone("QCDinv") hmetInverted_FakeTau.Add(hmetInverted_EWK_GenuineTaus,-1) #===== Set histograms to the templates template_EWKFakeTaus_Inverted.setHistogram(hmetInverted_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Inverted.setHistogram(hmetInverted_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Inverted.setHistogram(hmetInverted_EWKinclusive, binLabels[i]) template_QCD_Inverted.setHistogram(hmetInverted_QCD, binLabels[i]) template_FakeTau_Inverted.setHistogram(hmetInverted_FakeTau, binLabels[i]) if hmetInverted_QCDMC: template_QCDMC_Inverted.setHistogram(hmetInverted_QCDMC, binLabels[i]) template_EWKFakeTaus_Baseline.setHistogram(hmetBase_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Baseline.setHistogram(hmetBase_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Baseline.setHistogram(hmetBase_EWKinclusive, binLabels[i]) template_QCD_Baseline.setHistogram(hmetBase_QCD, binLabels[i]) template_FakeTau_Baseline.setHistogram(hmetBase_FakeTau, binLabels[i]) if hmetBase_QCDMC: template_QCDMC_Baseline.setHistogram(hmetBase_QCDMC, binLabels[i]) #===== Make plots of templates manager.plotTemplates() #===== Fit individual templates to data fitOptions = "R B L W M" # RBLWM manager.calculateNormalizationCoefficients(hmetBase_data, fitOptions, FITMIN, FITMAX) #===== Calculate combined normalisation coefficient (f_fakes = w*f_QCD + (1-w)*f_EWKfakes) # Obtain histograms histoName = "ForDataDrivenCtrlPlots/shapeTransverseMass/shapeTransverseMass"+binStr dataMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) treatNegativeBins(dataMt, "Data_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKFakeTaus/shapeTransverseMass/shapeTransverseMass"+binStr ewkFakeTausMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkFakeTausMt, "ewkFakeTaus_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKGenuineTaus/shapeTransverseMass/shapeTransverseMass"+binStr ewkGenuineTausMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkGenuineTausMt, "ewkGenuineTaus_inverted mT") qcdMt = dataMt.Clone("QCD") qcdMt.Add(ewkFakeTausMt, -1) qcdMt.Add(ewkGenuineTausMt, -1) treatNegativeBins(qcdMt, "QCD_inverted mT") # Do calculation manager.calculateCombinedNormalizationCoefficient(qcdMt, ewkFakeTausMt) #===== Save normalization outFileName = "QCDNormalizationFactors_%s_%s.py"%(HISTONAME, moduleInfoString) outFileFullName = os.path.join(argv[1],outFileName) manager.writeScaleFactorFile(outFileFullName, moduleInfoString)
def main(): if len(sys.argv) < 2: usage() dirs = [] dirs.append(sys.argv[1]) # Read the datasets # datasets = dataset.getDatasetsFromMulticrabDirs(dirs,counters=counters, dataEra=dataEra, analysisBaseName="signalAnalysisInvertedTau") datasets = dataset.getDatasetsFromMulticrabDirs(dirs,dataEra=dataEra, searchMode=searchMode, analysisName=analysis, optimizationMode=optMode) # datasets = dataset.getDatasetsFromMulticrabDirs(dirs,counters=counters) # datasets = dataset.getDatasetsFromMulticrabCfg(counters=counters, dataEra=dataEra) # datasets.updateNAllEventsToPUWeighted() datasets.loadLuminosities() datasets.updateNAllEventsToPUWeighted() # Take QCD from data datasetsQCD = None if QCDfromData: #datasetsQCD = dataset.getDatasetsFromMulticrabCfg(cfgfile="/home/rkinnune/signalAnalysis/CMSSW_4_2_8_patch2/src/HiggsAnalysis/NtupleAnalysis/test/multicrab_111123_132128/multicrab.cfg", counters=counters) datasetsQCD = dataset.getDatasetsFromMulticrabCfg(cfgfile="/afs/cern.ch/work/e/epekkari/DataDrivenFakeTaus/CMSSW_5_3_9_patch3/src/HiggsAnalysis/NtupleAnalysis/test/multicrab_140526_122821/multicrab.cfg") datasetsQCD.loadLuminosities() print "QCDfromData", QCDfromData datasetsQCD.mergeData() datasetsQCD.remove(datasetsQCD.getMCDatasetNames()) datasetsQCD.rename("Data", "QCD") #for d in datasets.getAllDatasets(): # print d.getName() #print "-------" #plots.mergeRenameReorderForDataMC(datasets) # print "Int.Lumi",datasets.getDataset("Data").getLuminosity() # Remove signals other than M120 datasets.remove(filter(lambda name: "TTToHplus" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "HplusTB" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_t-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_tW-channel" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_SemiLept" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_FullLept" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "TTJets_Hadronic" in name, datasets.getAllDatasetNames())) plots.mergeRenameReorderForDataMC(datasets) datasets.merge("EWK", ["WJets", "DYJetsToLL", "SingleTop", "Diboson", "TTJets"], keepSources=True) datasets.remove(filter(lambda name: "W2Jets" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "W3Jets" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "W4Jets" in name, datasets.getAllDatasetNames())) datasets.remove(filter(lambda name: "Hplus_taunu_s-channel" in name, datasets.getAllDatasetNames())) # Remove QCD #datasets.remove(filter(lambda name: "QCD" in name, datasets.getAllDatasetNames())) datasets_lands = datasets.deepCopy() # Set the signal cross sections to the ttbar for datasets for lands # xsect.setHplusCrossSectionsToTop(datasets_lands) # Set the signal cross sections to a given BR(t->H), BR(h->taunu) xsect.setHplusCrossSectionsToBR(datasets, br_tH=0.01, br_Htaunu=1) # Set the signal cross sections to a value from MSSM # xsect.setHplusCrossSectionsToMSSM(datasets, tanbeta=20, mu=200) plots.mergeWHandHH(datasets) # merging of WH and HH signals must be done after setting the cross section # Apply TDR style style = tdrstyle.TDRStyle() # Create plots doPlots(datasets) # Write mt histograms to ROOT file # writeTransverseMass(datasets_lands) # Print counters doCounters(datasets)
def main(): if len(sys.argv) < 2: usage() dirs = [] dirs.append(sys.argv[1]) # Read the datasets # datasets = dataset.getDatasetsFromMulticrabDirs(dirs,counters=counters, dataEra=dataEra, analysisBaseName="signalAnalysisInvertedTau") datasets = dataset.getDatasetsFromMulticrabDirs(dirs, dataEra=dataEra, searchMode=searchMode, analysisName=analysis, optimizationMode=optMode) # datasets = dataset.getDatasetsFromMulticrabDirs(dirs,counters=counters) # datasets = dataset.getDatasetsFromMulticrabCfg(counters=counters, dataEra=dataEra) # datasets.updateNAllEventsToPUWeighted() datasets.loadLuminosities() datasets.updateNAllEventsToPUWeighted() # Take QCD from data datasetsQCD = None if QCDfromData: #datasetsQCD = dataset.getDatasetsFromMulticrabCfg(cfgfile="/home/rkinnune/signalAnalysis/CMSSW_4_2_8_patch2/src/HiggsAnalysis/NtupleAnalysis/test/multicrab_111123_132128/multicrab.cfg", counters=counters) datasetsQCD = dataset.getDatasetsFromMulticrabCfg( cfgfile= "/afs/cern.ch/work/e/epekkari/DataDrivenFakeTaus/CMSSW_5_3_9_patch3/src/HiggsAnalysis/NtupleAnalysis/test/multicrab_140526_122821/multicrab.cfg" ) datasetsQCD.loadLuminosities() print "QCDfromData", QCDfromData datasetsQCD.mergeData() datasetsQCD.remove(datasetsQCD.getMCDatasetNames()) datasetsQCD.rename("Data", "QCD") #for d in datasets.getAllDatasets(): # print d.getName() #print "-------" #plots.mergeRenameReorderForDataMC(datasets) # print "Int.Lumi",datasets.getDataset("Data").getLuminosity() # Remove signals other than M120 datasets.remove( filter(lambda name: "TTToHplus" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "HplusTB" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "Hplus_taunu_t-channel" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "Hplus_taunu_tW-channel" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "TTJets_SemiLept" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "TTJets_FullLept" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "TTJets_Hadronic" in name, datasets.getAllDatasetNames())) plots.mergeRenameReorderForDataMC(datasets) datasets.merge("EWK", ["WJets", "DYJetsToLL", "SingleTop", "Diboson", "TTJets"], keepSources=True) datasets.remove( filter(lambda name: "W2Jets" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "W3Jets" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "W4Jets" in name, datasets.getAllDatasetNames())) datasets.remove( filter(lambda name: "Hplus_taunu_s-channel" in name, datasets.getAllDatasetNames())) # Remove QCD #datasets.remove(filter(lambda name: "QCD" in name, datasets.getAllDatasetNames())) datasets_lands = datasets.deepCopy() # Set the signal cross sections to the ttbar for datasets for lands # xsect.setHplusCrossSectionsToTop(datasets_lands) # Set the signal cross sections to a given BR(t->H), BR(h->taunu) xsect.setHplusCrossSectionsToBR(datasets, br_tH=0.01, br_Htaunu=1) # Set the signal cross sections to a value from MSSM # xsect.setHplusCrossSectionsToMSSM(datasets, tanbeta=20, mu=200) plots.mergeWHandHH( datasets ) # merging of WH and HH signals must be done after setting the cross section # Apply TDR style style = tdrstyle.TDRStyle() # Create plots doPlots(datasets) # Write mt histograms to ROOT file # writeTransverseMass(datasets_lands) # Print counters doCounters(datasets)
def main(argv, dsetMgr, moduleInfoString): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" comparisonList = ["AfterStdSelections"] dirs = [] dirs.append(sys.argv[1]) # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0],dsetMgr,name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove(filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) # Merge MC EWK samples as one EWK sample myMergeList = [] if "TT" in dsetMgr.getMCDatasetNames(): myMergeList.append("TT") # Powheg, no neg. weights -> large stats. else: myMergeList.append("TTJets") # Madgraph with negative weights print "Warning: using TTJets as input, but this is suboptimal. Please switch to the TT sample (much more stats.)." myMergeList.append("WJetsHT") myMergeList.append("DYJetsToLLHT") myMergeList.append("SingleTop") if "Diboson" in dsetMgr.getMCDatasetNames(): myMergeList.append("Diboson") print "Warning: ignoring diboson sample (since it does not exist) ..." for item in myMergeList: if not item in dsetMgr.getMCDatasetNames(): raise Exception("Error: tried to use dataset '%s' as part of the merged EWK dataset, but the dataset '%s' does not exist!"%(item,item)) dsetMgr.merge("EWK", myMergeList) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME+"/NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME+"/NormalizationMETInvertedTau"+HISTONAME FITMIN = None FITMAX = None #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent(COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception("Error: Cannot find histogram or directory of name '%s'!"%name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: binIndex = hname.replace("NormalizationMETBaselineTau"+HISTONAME,"") hDummy = dsetMgr.getDataset("Data").getDatasetRootHisto(COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binIndex).getHistogram() title = hDummy.GetTitle() title = title.replace("METBaseline"+HISTONAME,"") if hDummy.Integral() > 0.0: bins.append(binIndex) if binIndex == "Inclusive": binLabels.append(binIndex) else: binLabels.append(QCDNormalization.getModifiedBinLabelString(title)) if FITMIN == None: FITMIN = hDummy.GetXaxis().GetXmin() FITMAX = hDummy.GetXaxis().GetXmax() hDummy.Delete() else: print "Skipping bin '%s' (%s) because it has no entries"%(binIndex, QCDNormalization.getModifiedBinLabelString(title)) print "\nHistogram bins available",bins # Select bins by filter if len(selectOnlyBins) > 0: oldBinLabels = binLabels[:] oldBins = bins[:] binLabels = [] bins = [] for k in selectOnlyBins: for i in range(len(oldBinLabels)): if k == oldBinLabels[i] or k == oldBins[i]: binLabels.append(oldBinLabels[i]) bins.append(oldBins[i]) print "Using bins ",bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": "+binLabels[i] print line print #===== Initialize normalization calculator #manager = QCDNormalization.QCDNormalizationManagerExperimental1(binLabels) manager = QCDNormalization.QCDNormalizationManagerDefault(binLabels, dirs[0], moduleInfoString) #===== Create templates (EWK fakes, EWK genuine, QCD; data template is created by manager) template_EWKFakeTaus_Baseline = manager.createTemplate("EWKFakeTaus_Baseline") template_EWKFakeTaus_Inverted = manager.createTemplate("EWKFakeTaus_Inverted") template_EWKGenuineTaus_Baseline = manager.createTemplate("EWKGenuineTaus_Baseline") template_EWKGenuineTaus_Inverted = manager.createTemplate("EWKGenuineTaus_Inverted") template_EWKInclusive_Baseline = manager.createTemplate("EWKInclusive_Baseline") template_EWKInclusive_Inverted = manager.createTemplate("EWKInclusive_Inverted") template_QCD_Baseline = manager.createTemplate("QCD_Baseline") template_QCD_Inverted = manager.createTemplate("QCD_Inverted") #===== Define fit functions and fit parameters # The available functions are defined in the FitFunction class in the QCDMeasurement/python/QCDNormalization.py file # commented out fitter for EWK fake taus, since only the fit on inclusive EWK is used to obtain w_QCD #boundary = 100 #template_EWKFakeTaus_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunctionInv", boundary=boundary, norm=1, rejectPoints=1), #FITMIN, FITMAX) #template_EWKFakeTaus_Baseline.setDefaultFitParam(defaultInitialValue=[10.0, 100, 45, 0.02], #defaultLowerLimit= [ 0.1, 70, 10, 0.001], #defaultUpperLimit= [ 30, 300, 100, 0.1]) # commented out fitter for EWK genuine taus, since only the fit on inclusive EWK is used to obtain w_QCD #boundary = 150 #template_EWKGenuineTaus_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1), #FITMIN, FITMAX) #template_EWKGenuineTaus_Baseline.setDefaultFitParam(defaultLowerLimit=[0.5, 90, 30, 0.0001], #defaultUpperLimit=[ 20, 150, 50, 1.0]) # Inclusive EWK boundary = 150 template_EWKInclusive_Baseline.setFitter(QCDNormalization.FitFunction("EWKFunction", boundary=boundary, norm=1, rejectPoints=1), FITMIN, FITMAX) template_EWKInclusive_Baseline.setDefaultFitParam(defaultLowerLimit=[0.5, 90, 30, 0.0001], defaultUpperLimit=[ 20, 150, 50, 1.0]) # Note that the same function is used for QCD only and QCD+EWK fakes template_QCD_Inverted.setFitter(QCDNormalization.FitFunction("QCDFunction", norm=1), FITMIN, FITMAX) template_QCD_Inverted.setDefaultFitParam(defaultLowerLimit=[0.0001, 0.001, 0.1, 0.0, 10, 0.0001, 0.001], defaultUpperLimit=[ 200, 10, 10, 150, 100, 1, 0.05]) #===== Loop over tau pT bins for i,binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s"%binLabels[i] print "********************************\n" #===== Reset bin results manager.resetBinResults() #===== Obtain histograms for normalization # Data histoName = COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_data = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) histoName = COMBINEDHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_data = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) # EWK genuine taus histoName = GENUINEHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_EWK_GenuineTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) histoName = GENUINEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_EWK_GenuineTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) # EWK fake taus histoName = FAKEHISTODIR+"/"+BASELINETAUHISTONAME+binStr hmetBase_EWK_FakeTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) histoName = FAKEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr hmetInverted_EWK_FakeTaus = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) # Finalize histograms by rebinning for histogram in [hmetBase_data, hmetInverted_data, hmetBase_EWK_GenuineTaus, hmetInverted_EWK_GenuineTaus, hmetBase_EWK_FakeTaus, hmetInverted_EWK_FakeTaus]: histogram.Rebin(_rebinFactor) #===== Obtain inclusive EWK histograms hmetBase_EWKinclusive = hmetBase_EWK_GenuineTaus.Clone("EWKinclusiveBase") hmetBase_EWKinclusive.Add(hmetBase_EWK_FakeTaus, 1.0) hmetInverted_EWKinclusive = hmetInverted_EWK_GenuineTaus.Clone("EWKinclusiveInv") hmetInverted_EWKinclusive.Add(hmetInverted_EWK_FakeTaus, 1.0) #===== Obtain histograms for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check hmetBase_QCD = hmetBase_data.Clone("QCDbase") hmetBase_QCD.Add(hmetBase_EWKinclusive,-1) hmetInverted_QCD = hmetInverted_data.Clone("QCDinv") hmetInverted_QCD.Add(hmetInverted_EWKinclusive,-1) #===== Set histograms to the templates template_EWKFakeTaus_Inverted.setHistogram(hmetInverted_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Inverted.setHistogram(hmetInverted_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Inverted.setHistogram(hmetInverted_EWKinclusive, binLabels[i]) template_QCD_Inverted.setHistogram(hmetInverted_QCD, binLabels[i]) template_EWKFakeTaus_Baseline.setHistogram(hmetBase_EWK_FakeTaus, binLabels[i]) template_EWKGenuineTaus_Baseline.setHistogram(hmetBase_EWK_GenuineTaus, binLabels[i]) template_EWKInclusive_Baseline.setHistogram(hmetBase_EWKinclusive, binLabels[i]) template_QCD_Baseline.setHistogram(hmetBase_QCD, binLabels[i]) #===== Make plots of templates manager.plotTemplates() #===== Fit individual templates to data fitOptions = "R B" # RBLW manager.calculateNormalizationCoefficients(hmetBase_data, fitOptions, FITMIN, FITMAX) #===== Calculate combined normalisation coefficient (f_fakes = w*f_QCD + (1-w)*f_EWKfakes) # Obtain histograms histoName = "ForDataDrivenCtrlPlots/shapeTransverseMass/shapeTransverseMass"+binStr dataMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("Data").getRootHisto().Clone(histoName) treatNegativeBins(dataMt, "Data_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKFakeTaus/shapeTransverseMass/shapeTransverseMass"+binStr ewkFakeTausMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkFakeTausMt, "ewkFakeTaus_inverted mT") histoName = "ForDataDrivenCtrlPlotsEWKGenuineTaus/shapeTransverseMass/shapeTransverseMass"+binStr ewkGenuineTausMt = plots.DataMCPlot(dsetMgr, histoName).histoMgr.getHisto("EWK").getRootHisto().Clone(histoName) treatNegativeBins(ewkGenuineTausMt, "ewkGenuineTaus_inverted mT") qcdMt = dataMt.Clone("QCD") qcdMt.Add(ewkFakeTausMt, -1) qcdMt.Add(ewkGenuineTausMt, -1) treatNegativeBins(qcdMt, "QCD_inverted mT") # Do calculation manager.calculateCombinedNormalizationCoefficient(qcdMt, ewkFakeTausMt) #===== Save normalization outFileName = "QCDNormalizationFactors_%s_%s.py"%(HISTONAME, moduleInfoString) print argv[1],outFileName outFileFullName = os.path.join(argv[1],outFileName) manager.writeScaleFactorFile(outFileFullName, moduleInfoString)
def main(argv): COMBINEDHISTODIR = "ForQCDNormalization" FAKEHISTODIR = "ForQCDNormalizationEWKFakeTaus" GENUINEHISTODIR = "ForQCDNormalizationEWKGenuineTaus" comparisonList = ["AfterStdSelections"] dirs = [] if len(sys.argv) < 2: usage() dirs.append(sys.argv[1]) # Create all dsetMgr from a multicrab task dsetMgr = dataset.getDatasetsFromMulticrabDirs(dirs,dataEra=dataEra, searchMode=searchMode, analysisName=analysis) #print dsetMgr # Check multicrab consistency consistencyCheck.checkConsistencyStandalone(dirs[0],dsetMgr,name="QCD inverted") # As we use weighted counters for MC normalisation, we have to # update the all event count to a separately defined value because # the analysis job uses skimmed pattuple as an input dsetMgr.updateNAllEventsToPUWeighted() # Read integrated luminosities of data dsetMgr from lumi.json dsetMgr.loadLuminosities() # Include only 120 mass bin of HW and HH dsetMgr dsetMgr.remove(filter(lambda name: "TTToHplus" in name and not "M120" in name, dsetMgr.getAllDatasetNames())) dsetMgr.remove(filter(lambda name: "HplusTB" in name, dsetMgr.getAllDatasetNames())) # Default merging nad ordering of data and MC dsetMgr # All data dsetMgr to "Data" # All QCD dsetMgr to "QCD" # All single top dsetMgr to "SingleTop" # WW, WZ, ZZ to "Diboson" plots.mergeRenameReorderForDataMC(dsetMgr) # Set BR(t->H) to 0.05, keep BR(H->tau) in 1 xsect.setHplusCrossSectionsToBR(dsetMgr, br_tH=0.05, br_Htaunu=1) # Merge WH and HH dsetMgr to one (for each mass bin) plots.mergeWHandHH(dsetMgr) dsetMgr.merge("EWK", [ "TTJets", "WJetsHT", "DYJetsToLL", "SingleTop", #"Diboson" ]) # Apply TDR style style = tdrstyle.TDRStyle() style.setOptStat(True) for HISTONAME in comparisonList: BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME+"/NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME+"/NormalizationMETInvertedTau"+HISTONAME #===== Infer binning information and labels histonames = dsetMgr.getDataset("Data").getDirectoryContent(COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME) bins = [] binLabels = [] if histonames == None: # Assume that only inclusive bin exists name = COMBINEDHISTODIR+"/NormalizationMETBaselineTau"+HISTONAME if not dsetMgr.getDataset("Data").hasRootHisto(name): raise Exception("Error: Cannot find histogram or directory of name '%s'!"%name) BASELINETAUHISTONAME = "NormalizationMETBaselineTau"+HISTONAME INVERTEDTAUHISTONAME = "NormalizationMETInvertedTau"+HISTONAME bins = [""] binLabels = ["Inclusive"] else: for hname in histonames: bins.append(hname.replace("NormalizationMETBaselineTau"+HISTONAME,"")) title = dsetMgr.getDataset("Data").getDatasetRootHisto(COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+"/"+hname).getHistogram().GetTitle() title = title.replace("METBaseline"+HISTONAME,"") binLabels.append(formatHistoTitle(title)) print "\nHistogram bins available",bins print "Using bins ",bins print "\nBin labels" for i in range(len(binLabels)): line = bins[i] while len(line) < 10: line += " " line += ": "+binLabels[i] print line print #===== Initialize normalization calculator invertedQCD = InvertedTauID() invertedQCD.setLumi(dsetMgr.getDataset("Data").getLuminosity()) invertedQCD.setInfo([dataEra,searchMode,HISTONAME]) #===== Loop over tau pT bins for i,binStr in enumerate(bins): print "\n********************************" print "*** Fitting bin %s"%binLabels[i] print "********************************\n" invertedQCD.resetBinResults() invertedQCD.setLabel(binLabels[i]) #===== Obtain histograms for normalization metBase = plots.DataMCPlot(dsetMgr, COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binStr) metInver = plots.DataMCPlot(dsetMgr, COMBINEDHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) metBase_GenuineTaus = plots.DataMCPlot(dsetMgr, GENUINEHISTODIR+"/"+BASELINETAUHISTONAME+binStr) metInver_GenuineTaus = plots.DataMCPlot(dsetMgr, GENUINEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) metBase_FakeTaus = plots.DataMCPlot(dsetMgr, FAKEHISTODIR+"/"+BASELINETAUHISTONAME+binStr) metInver_FakeTaus = plots.DataMCPlot(dsetMgr, FAKEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) #===== Rebin histograms before subtracting RebinFactor = 2 # Aim for 10 GeV binning metBase.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) metBase_GenuineTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver_GenuineTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) metBase_FakeTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) metInver_FakeTaus.histoMgr.forEachHisto(lambda h: h.getRootHisto().Rebin(RebinFactor)) #===== Obtain templates for data and EWK metInverted_data = metInver.histoMgr.getHisto("Data").getRootHisto().Clone(COMBINEDHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) treatHistogram(metInverted_data, "Data, inverted") metInverted_EWK_GenuineTaus = metInver_GenuineTaus.histoMgr.getHisto("EWK").getRootHisto().Clone(GENUINEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) treatHistogram(metInverted_EWK_GenuineTaus, "EWK genuine taus, inverted") metInverted_EWK_FakeTaus = metInver_FakeTaus.histoMgr.getHisto("EWK").getRootHisto().Clone(FAKEHISTODIR+"/"+INVERTEDTAUHISTONAME+binStr) treatHistogram(metInverted_EWK_FakeTaus, "EWK fake taus, inverted") metBase_data = metBase.histoMgr.getHisto("Data").getRootHisto().Clone(COMBINEDHISTODIR+"/"+BASELINETAUHISTONAME+binStr) treatHistogram(metBase_data, "Data, baseline") metBase_EWK_GenuineTaus = metBase_GenuineTaus.histoMgr.getHisto("EWK").getRootHisto().Clone(GENUINEHISTODIR+"/"+BASELINETAUHISTONAME+binStr) treatHistogram(metBase_EWK_GenuineTaus, "EWK genuine taus, baseline") metBase_EWK_FakeTaus = metBase_FakeTaus.histoMgr.getHisto("EWK").getRootHisto().Clone(FAKEHISTODIR+"/"+BASELINETAUHISTONAME+binStr) treatHistogram(metBase_EWK_FakeTaus, "EWK fake taus, baseline") #===== Obtain templates for QCD (subtract MC EWK events from data) # QCD from baseline is usable only as a cross check #metBase_QCD = metBase_data.Clone("QCD") #metBase_QCD.Add(metBase_EWK_GenuineTaus,-1) #metBase_QCD.Add(metBase_EWK_FakeTaus,-1) #addLabels(metBase_QCD, "QCD, baseline") metInverted_QCD = metInverted_data.Clone("QCD") metInverted_QCD.Add(metInverted_EWK_GenuineTaus,-1) metInverted_QCD.Add(metInverted_EWK_FakeTaus,-1) treatHistogram(metInverted_QCD, "QCD, inverted") #===== Make plots of templates print "\n*** Integrals of plotted templates" #invertedQCD.plotHisto(metInverted_data,"template_Data_Inverted") #invertedQCD.plotHisto(metInverted_EWK_GenuineTaus,"template_EWKGenuineTaus_Inverted") #invertedQCD.plotHisto(metInverted_EWK_FakeTaus,"template_EWKFakeTaus_Inverted") invertedQCD.plotHisto(metInverted_QCD,"template_QCD_Inverted") invertedQCD.plotHisto(metBase_data,"template_Data_Baseline") invertedQCD.plotHisto(metBase_EWK_GenuineTaus,"template_EWKGenuineTaus_Baseline") invertedQCD.plotHisto(metBase_EWK_FakeTaus,"template_EWKFakeTaus_Baseline") #invertedQCD.plotHisto(metBase_QCD,"template_QCD_Baseline") #===== Fit individual templates and # Fit first templates for QCD, EWK_genuine_taus, and EWK_fake_taus # Then fit the shape of those parametrizations to baseline data to obtain normalization coefficients fitOptions = "RB" # Strategy: take EWK templates from baseline and QCD template from inverted; then fit to baseline data invertedQCD.fitEWK_GenuineTaus(metInverted_EWK_GenuineTaus,fitOptions) invertedQCD.fitEWK_GenuineTaus(metBase_EWK_GenuineTaus,fitOptions) invertedQCD.fitEWK_FakeTaus(metInverted_EWK_FakeTaus,fitOptions) invertedQCD.fitEWK_FakeTaus(metBase_EWK_FakeTaus,fitOptions) invertedQCD.fitQCD(metInverted_QCD,fitOptions) invertedQCD.fitData(metBase_data) #===== Calculate normalization invertedQCD.getNormalization() invertedQCD.Summary() invertedQCD.WriteNormalizationToFile("QCDInvertedNormalizationFactorsFilteredEWKFakeTaus.py") invertedQCD.WriteLatexOutput("fits.tex")