def harvestEm(self,channel='wenu',charge='both'): cmb = ch.CombineHarvester() # Read all the cards. # CH stores metadata about each object (Observation, Process, Systematic), # this is extracted from the card names with some regex for card in glob.glob(self.bindir+('/%s_mass*.txt' % channel)): cmb.QuickParseDatacard(card, """%s_mass(?<MASS>\d+)_$CHANNEL.card.txt""" % channel) # Need a unqiue bin name for each plus/minus,pt and eta combination # We extracted this part of the datacard name into the channel variable above, # so can just copy it and override the specific bin name that was in all the cards cmb.ForEachObj(lambda obj: obj.set_bin(obj.channel())) # We'll have three copies of the observation, one for each mass point. # Filter all but one copy. cmb.FilterObs(lambda obj: obj.mass() != '%d' % self.mwcentral) # Create workspace to hold the morphing pdfs and the mass w = ROOT.RooWorkspace('morph', 'morph') mass = w.factory('mw[{mwrange}]'.format(mwrange=self.mwrange)) # BuildRooMorphing will dump a load of debug plots here debug = ROOT.TFile(self.bindir+'/debug.root', 'RECREATE') # Run for each bin,process combination (only for signal!) for b in cmb.bin_set(): for p in cmb.cp().bin([b]).signals().process_set(): morphing.BuildRooMorphing(w, cmb, b, p, mass, verbose=True, file=debug) # Just to be safe mass.setConstant(True) # Now the workspace is copied into the CH instance and the pdfs attached to the processes # (this relies on us knowing that BuildRooMorphing will name the pdfs in a particular way) cmb.AddWorkspace(w, True) cmb.cp().process(['W']).ExtractPdfs(cmb, 'morph', '$BIN_$PROCESS_morph', '') # Adjust the rateParams a bit - we currently have three for each bin (one for each mass), # but we only want one. Easiest to drop the existing ones completely and create new ones cmb.syst_type(['rateParam'], False) cmb.cp().process(['W']).AddSyst(cmb, 'norm_$BIN', 'rateParam', ch.SystMap()(1.00)) # Have to set the range by hand for sys in cmb.cp().syst_type(['rateParam']).syst_name_set(): cmb.GetParameter(sys).set_range(0.5, 1.5) # Print the contents of the model cmb.PrintAll() # Write out the cards, one per bin outdir=self.bindir+'/wenu_cards_morphed_{charge}'.format(charge=charge) writer = ch.CardWriter('$TAG/$BIN.txt', '$TAG/shapes.root') writer.SetVerbosity(1) writer.WriteCards(outdir, cmb)
def __init__(self, input_filename): if not os.path.exists(input_filename): logger.fatal("File %s does not exist.", input_filename) raise Exception self._input_filename = input_filename self._shapes = self._get_shapes() logger.info("Found %d shapes in input file %s.", len(self._shapes), self._input_filename) self._cb = ch.CombineHarvester() self._shapes_extracted = False
def __init__(self, cb=None): super(Datacards, self).__init__() self.cb = cb if self.cb is None: self.cb = ch.CombineHarvester() if log.isEnabledFor(logging.DEBUG): self.cb.SetVerbosity(1) self.configs = datacardconfigs.DatacardConfigs() self.stable_options = r"--robustFit 1 --preFitValue 1.0 --cminDefaultMinimizerType Minuit2 --cminDefaultMinimizerAlgo Minuit2 --cminDefaultMinimizerStrategy 0 --cminFallbackAlgo Minuit2,0:1.0"
def main(): options = options_() for cutkey in options.cut: print 'cutkey : ', cutkey ### get M_A and M_H ### mH = float(options.mH_list[cutkey]) mA = float(options.mA_list[cutkey]) print mH, mA """Main function""" # start the timer tstart = datetime.now() print 'starting...' # get the options #options = get_options() intL = options.lumi # in pb-1 #tag = 'v1.2.0+7415-19-g7bbca78_ZAAnalysis_1a69757' #path = '/nfs/scratch/fynu/amertens/cmssw/CMSSW_7_4_15/src/cp3_llbb/CommonTools/histFactory/16_01_28_syst/build' tag = 'v1.1.0+7415-83-g2a9f912_ZAAnalysis_2ff9261' #tag = 'v1.1.0+7415-57-g4bff5ea_ZAAnalysis_b1377a8' path = options.path CHANNEL = options.CHANNEL ERA = options.ERA MASS = str(mH) + "_" + str(mA) ANALYSIS = options.ANALYSIS DEBUG = 0 c = ch.CombineHarvester() cats = [(0, "mmbbSR" + cutkey), (1, "mll_mmbbBR" + cutkey), (2, "eebbSR" + cutkey), (3, "mll_eebbBR" + cutkey)] bins = {} bins['signalregion_mm'] = "mmbbSR" + cutkey bins['mll_bkgregion_mm'] = "mll_mmbbBR" + cutkey bins['signalregion_ee'] = "eebbSR" + cutkey bins['mll_bkgregion_ee'] = "mll_eebbBR" + cutkey processes = {} p = Process('data_obs') #DoubleMuon_Run2015D_v1.1.0+7415-57-g4bff5ea_ZAAnalysis_b1377a8_histos.root p.prepare_process(path, 'data_obs', 'DoubleMuon_DoubleEG_Run2015D', tag) processes['data_obs'] = p if DEBUG: print p # define signal # define backgrounds # zz p = Process('zz') p.prepare_process( path, 'zz', 'ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8_MiniAODv2', tag) processes['zz'] = p if DEBUG: print p # ttbar p = Process('ttbar') p.prepare_process(path, 'ttbar', 'TTTo2L2Nu_13TeV-powheg_MiniAODv2', tag) processes['ttbar'] = p p = Process('ttbar') if DEBUG: print p ''' # drell-yan p = Process('dy1') p.prepare_process(path, 'dy1', 'DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX_MiniAODv2', tag) processes['dy1'] = p if DEBUG: print p ''' p = Process('dy2') p.prepare_process( path, 'dy2', 'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX_MiniAODv2', tag) processes['dy2'] = p if DEBUG: print p c.AddObservations([MASS], [ANALYSIS], [ERA], [CHANNEL], cats) c.AddProcesses([MASS], [ANALYSIS], [ERA], [CHANNEL], ['ZA'], cats, True) c.AddProcesses([MASS], [ANALYSIS], [ERA], [CHANNEL], ['ttbar', 'dy2', 'zz'], cats, False) c.cp().process(['ttbar', 'dy2', 'ZA']).AddSyst( c, "lumi", "lnN", ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0, 1, 2, 3], 1.046)) c.cp().process(['ttbar', 'dy2', 'ZA']).AddSyst( c, "trig", "lnN", ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0, 1, 2, 3], 1.04)) c.cp().process(['ttbar', 'dy2']).AddSyst(c, "btag", "shape", ch.SystMap()(1.0)) c.cp().process(['ttbar', 'dy2']).AddSyst(c, "jec", "shape", ch.SystMap()(1.0)) c.cp().process(['ttbar', 'dy2']).AddSyst(c, "jer", "shape", ch.SystMap()(1.0)) c.cp().process(['ttbar', 'dy2']).AddSyst(c, "pu", "shape", ch.SystMap()(1.0)) c.cp().process(['ttbar']).AddSyst(c, "TTpdf", "shape", ch.SystMap()(1.0)) c.cp().process(['dy2']).AddSyst(c, "DYpdf", "shape", ch.SystMap()(1.0)) c.cp().process(['dy2']).AddSyst( c, "DYnorm", "lnN", ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0, 1], 1.1)) c.cp().process(['ttbar']).AddSyst( c, "TTnorm", "lnN", ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0], 1.1)) nChannels = len(bins) nBackgrounds = len( [processes[x] for x in processes if processes[x].type > 0]) nNuisances = 1 systematics = { '': '', '_btagUp': '__btagup', '_btagDown': '__btagdown', '_jecUp': '__jecup', '_jecDown': '__jecdown', '_jerUp': '__jerup', '_jerDown': '__jerdown', '_puUp': '__puup', '_puDown': '__pudown', '_TTpdfUp': '__pdfup', '_TTpdfDown': '__pdfdown', '_DYpdfUp': '__pdfup', '_DYpdfDown': '__pdfdown' } outputRoot = "shapes.root" f = TFile(outputRoot, "recreate") f.Close() for b in bins: print b, bins[b] for p in processes: if p == 'data_obs': file_in = TFile(processes[p].file, "READ") print " Getting ", bins[b], " in file ", processes[p].file h = file_in.Get(bins[b]) h.SetDirectory(0) file_in.Close() f = TFile(outputRoot, "update") h.SetName("hist_" + bins[b] + "_" + p) h.Write() f.Write() f.Close() else: for s1, s2 in systematics.iteritems(): file_in = TFile(processes[p].file, "READ") print " Getting ", bins[ b] + s2, " in file ", processes[p].file h = file_in.Get(bins[b] + s2) h.SetDirectory(0) file_in.Close() f = TFile(outputRoot, "update") h.SetName("hist_" + bins[b] + "_" + p + s1) h.Sumw2() #h.Scale(processes[p].xsection * intL / processes[p].sumW) h.Scale(intL) h.Write() f.Write() f.Close() # Fill signal histograms FIXME: read efficiencies from eff.root eff_file = TFile("eff.root", "READ") effee_hist = eff_file.Get("effee") eff_ee = effee_hist.Interpolate(mA, mH) effmm_hist = eff_file.Get("effmm") eff_mm = effmm_hist.Interpolate(mA, mH) print "lumi : ", options.lumifb print "eff at ", mA, mH, ":", eff_ee, eff_mm print "ZA yields: ", options.lumifb * eff_mm, options.lumifb * eff_ee f = TFile(outputRoot, "update") h1 = TH1F("hist_" + bins['signalregion_mm'] + "_ZA", "hist_" + bins['signalregion_mm'] + "_ZA", 1, 0, 1) h1.Fill(0.5, options.lumifb * eff_mm) h1.Write() h2 = TH1F("hist_" + bins['mll_bkgregion_mm'] + "_ZA", "hist_" + bins['mll_bkgregion_mm'] + "_ZA", 60, 60, 120) h2.Write() h3 = TH1F("hist_" + bins['signalregion_ee'] + "_ZA", "hist_" + bins['signalregion_ee'] + "_ZA", 1, 0, 1) h3.Fill(0.5, options.lumifb * eff_ee) h3.Write() h4 = TH1F("hist_" + bins['mll_bkgregion_ee'] + "_ZA", "hist_" + bins['mll_bkgregion_ee'] + "_ZA", 60, 60, 120) h4.Write() f.Write() f.Close() c.cp().backgrounds().ExtractShapes(outputRoot, "hist_$BIN_$PROCESS", "hist_$BIN_$PROCESS_$SYSTEMATIC") c.cp().signals().ExtractShapes(outputRoot, "hist_$BIN_$PROCESS", "hist_$BIN_$PROCESS_$SYSTEMATIC") writer = ch.CardWriter( '$TAG/$MASS/$ANALYSIS_$CHANNEL_$ERA.dat', '$TAG/common/$ANALYSIS_$CHANNEL_$MASS.input_$ERA.root') writer.WriteCards('CARDS/', c)
ROOT.gSystem.Load('libHiggsAnalysisCombinedLimit') ### WORKSPACE PATH [CHANGE BOTH WORKSPACE AND MLFIT ROOT FILE AT THE SAME TIME] ### ======= CONSTRAINED ========= # fin = ROOT.TFile('output/CONSTRAINED-svfit/cmb/wsp.root') # fin = ROOT.TFile('output/CONSTRAINED-mvis/cmb/wsp.root') ### ======= UNCONSTRAINED ========= # fin = ROOT.TFile('output/LIMITS-svfit/cmb/wsp.root') fin = ROOT.TFile('workspace.root') wsp = fin.Get('w') cmb = ch.CombineHarvester() cmb.SetFlag("workspaces-use-clone", True) ch.ParseCombineWorkspace(cmb, wsp, 'ModelConfig', 'data_obs', False) ### MLFIT ROOT FILE PATH [CHANGE BOTH WORKSPACE AND MLFIT ROOT FILE AT THE SAME TIME] ### ======= CONSTRAINED ========= # mlf = ROOT.TFile('output/CONSTRAINED-svfit/cmb/mlfit.Test.root') # mlf = ROOT.TFile('output/CONSTRAINED-mvis/cmb/mlfit.Test.root') ### ======= UNCONSTRAINED ========= # mlf = ROOT.TFile('output/LIMITS-svfit/cmb/mlfit.Test.root') mlf = ROOT.TFile('mlfit.root')
def prepareShapes(backgrounds, signals, discriminant, discriminantName): # Backgrounds is a list of string of the considered backgrounds corresponding to entries in processes_mapping # Signals is a list of string of the considered signals corresponding to entries in processes_mapping # discriminant is the corresponding entry in the dictionary discriminants import CombineHarvester.CombineTools.ch as ch root_path = options.root_path file, systematics = prepareFile(processes_mapping, discriminants, root_path, discriminantName) if options.dataYear != '2016': call([ 'python', 'symmetrize.py', options.output, file, options.dataYear ], shell=False) for signal in signals: cb = ch.CombineHarvester() cb.AddObservations(['*'], [''], ['_%s' % options.dataYear], [''], discriminant) cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], [signal], discriminant, True) #cb.AddProcesses(['*'], [''], ['_%s'%options.dataYear], [''], backgrounds, discriminant, False) if options.dataYear == '2016': cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], backgrounds, discriminant, False) else: if not 'b2j3' in discriminantName: try: backgrounds.remove('qcd') except: pass else: if not 'qcd' in backgrounds: backgrounds.append('qcd') if 'all' in discriminantName: if signal == 'Hut': discriminant.remove((1, 'DNN_Hut_b2j3')) cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], backgrounds + ['qcd'], [(1, 'DNN_Hut_b2j3')], False) else: discriminant.remove((1, 'DNN_Hct_b2j3')) cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], backgrounds + ['qcd'], [(1, 'DNN_Hct_b2j3')], False) cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], backgrounds, discriminant, False) if signal == 'Hut': discriminant.append((1, 'DNN_Hut_b2j3')) else: discriminant.append((1, 'DNN_Hct_b2j3')) else: cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''], backgrounds, discriminant, False) # Systematics if not options.nosys: for systematic in systematics: systematic_only_for_SMtt = False systematic_only_for_Sig = False for systSMtt in options.sysForSMtt: if CMSNamingConvention(systSMtt) == systematic: systematic_only_for_SMtt = True for systSig in options.sysForSig: if CMSNamingConvention(systSig) == systematic: systematic_only_for_Sig = True if not systematic_only_for_SMtt and not systematic_only_for_Sig: cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap()(1.00)) elif systematic_only_for_SMtt and not systematic_only_for_Sig: cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('process')(smTTlist, 1.00)) #if 'hdamp' in systematic: # for i in xrange(len(discriminant)): # if 'b2j3' in discriminant[i][1]: # cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttlf'], 1.00)) # cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb','ttcc'], 1.05)) # elif 'b2j4' in discriminant[i][1]: # cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], smTTlist, 1.00)) # elif 'b3j3' in discriminant[i][1]: # cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttlf'], 1.00)) # cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb','ttcc'], 1.05)) # elif 'b3j4' in discriminant[i][1]: # cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], smTTlist, 1.00)) # elif 'b4j4' in discriminant[i][1]: # cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.00)) # cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc','ttlf'], 1.05)) #else: cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('process')(smTTlist, 1.00)) elif not systematic_only_for_SMtt and systematic_only_for_Sig: cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('process')([signal], 1.00)) else: cb.cp().AddSyst( cb, systematic, 'shape', ch.SystMap('process')(smTTlist + [signal], 1.00)) #Lumi corr. https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM#LumiComb #cb.cp().AddSyst(cb, 'CMS_lumi', 'lnN', ch.SystMap()(options.luminosityError)) if options.dataYear == '2016': cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2016', 'lnN', ch.SystMap()(1.01)) cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN', ch.SystMap()(1.006)) #reproducing 2016 #cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2016', 'lnN', ch.SystMap()(1.027)) elif options.dataYear == '2017': cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2017', 'lnN', ch.SystMap()(1.02)) cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN', ch.SystMap()(1.009)) cb.cp().AddSyst(cb, 'CMS_lumi_corr_1718', 'lnN', ch.SystMap()(1.006)) elif options.dataYear == '2018': cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2018', 'lnN', ch.SystMap()(1.015)) cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN', ch.SystMap()(1.02)) cb.cp().AddSyst(cb, 'CMS_lumi_corr_1718', 'lnN', ch.SystMap()(1.002)) cb.cp().AddSyst( cb, 'tt_xsec', 'lnN', ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.055)) cb.cp().AddSyst(cb, 'Other_xsec', 'lnN', ch.SystMap('process')(['other'], 1.1)) #cb.cp().AddSyst(cb, 'hdamp', 'lnN', ch.SystMap('process')(smTTlist, 1.05)) #cb.cp().AddSyst(cb, 'TuneCP5', 'lnN', ch.SystMap('process')(smTTlist, 1.03)) for i in xrange(len(discriminant)): if 'b2j3' in discriminant[i][1]: cb.cp().AddSyst(cb, '$PROCESS_norm', 'lnN', ch.SystMap('process')(['qcd'], 1.5)) #reproducing 2016 ### comment out Other_xsec above! #if 'b2j3' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b2j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1)) #if 'b2j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b2j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1)) #if 'b3j3' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b3j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1)) #if 'b3j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b3j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1)) #if 'b4j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b4j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1)) if options.dataYear == '2016': #reproducing 2016 #cb.cp().AddSyst(cb, 'hdamp_2016', 'lnN', ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.05)) #cb.cp().AddSyst(cb, 'scale_2016', 'lnN', ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.15)) #for i in xrange(len(discriminant)): # if 'j3' in discriminant[i][1]: # cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) # cb.cp().AddSyst(cb, 'jec_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb', 'ttcc', 'ttlf', 'other', signal], 1.01)) # else: # cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) # cb.cp().AddSyst(cb, 'jec_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb', 'ttcc', 'ttlf', 'other', signal], 1.05))#1.05 for j4 for i in xrange(len(discriminant)): if 'b2' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b2_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3)) #cb.cp().AddSyst(cb, '$PROCESS_norm_b2_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) cb.cp().AddSyst( cb, '$PROCESS_norm_b2', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) elif 'b3' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b3_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3)) #cb.cp().AddSyst(cb, '$PROCESS_norm_b3_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) cb.cp().AddSyst( cb, '$PROCESS_norm_b3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) elif 'b4' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b4_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3)) #cb.cp().AddSyst(cb, '$PROCESS_norm_b4_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) cb.cp().AddSyst( cb, '$PROCESS_norm_b4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) else: for i in xrange(len(discriminant)): if 'b2' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b2', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.2)) cb.cp().AddSyst( cb, '$PROCESS_norm_b2', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) elif 'b3' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.22)) cb.cp().AddSyst( cb, '$PROCESS_norm_b3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) elif 'b4' in discriminant[i][1]: cb.cp().AddSyst( cb, '$PROCESS_norm_b4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.2)) cb.cp().AddSyst( cb, '$PROCESS_norm_b4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) #if 'j3' in discriminant[i][1]: # #cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) #else: # #cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3)) # cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5)) # Import shapes from ROOT file cb.cp().backgrounds().ExtractShapes(file, '$BIN/$PROCESS', '$BIN/$PROCESS__$SYSTEMATIC') cb.cp().signals().ExtractShapes(file, '$BIN/$PROCESS', '$BIN/$PROCESS__$SYSTEMATIC') #reproducing 2016 - comment out if options.dataYear == '2016': rebin = ch.AutoRebin().SetBinThreshold(100).SetBinUncertFraction( 0.1) rebin.Rebin(cb.cp(), cb) #elif options.dataYear == '2017': # #rebin_b2j3 = ch.AutoRebin().SetBinThreshold(5400)#.SetBinUncertFraction(0.1) # #rebin_b2j3.Rebin(cb.cp().bin(["DNN_Hut_b2j3", "DNN_Hct_b2j3"]), cb) #AutoMCStat cb.SetAutoMCStats(cb, 0.1) #reproducing 2016 #print "Treating bbb" #bbb = ch.BinByBinFactory() #bbb.SetAddThreshold(0.0001) #bbb.AddBinByBin(cb.cp().backgrounds(), cb) #bbb.AddBinByBin(cb.cp().signals(), cb) output_prefix = 'FCNC_%s_Discriminant_%s' % (signal, discriminantName) output_dir = os.path.join(options.output, '%s' % (signal)) if not os.path.exists(output_dir): os.makedirs(output_dir) fake_mass = '125' # Write card datacard = os.path.join(output_dir, output_prefix + '.dat') cb.cp().mass([fake_mass, "*"]).WriteDatacard( os.path.join(output_dir, output_prefix + '.dat'), os.path.join(output_dir, output_prefix + '_shapes.root')) # Write small script to compute the limit workspace_file = os.path.basename( os.path.join(output_dir, output_prefix + '_combine_workspace.root')) script = """#! /bin/bash text2workspace.py {datacard} -m {fake_mass} -o {workspace_root} # Run limit echo combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run blind #-v +2 #combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run expected #-v +2 combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run blind #-v +2 #combine -H AsymptoticLimits -M HybridNew -n {name} {workspace_root} -S {systematics} --LHCmode LHC-limits --expectedFromGrid 0.5 #for ecpected, use 0.84 and 0.16 """.format(workspace_root=workspace_file, datacard=os.path.basename(datacard), name=output_prefix, fake_mass=fake_mass, systematics=(0 if options.nosys else 1)) script_file = os.path.join(output_dir, output_prefix + '_run_limits.sh') with open(script_file, 'w') as f: f.write(script) st = os.stat(script_file) os.chmod(script_file, st.st_mode | stat.S_IEXEC) # Write small script for datacard checks script = """#! /bin/bash # Run checks echo combine -M FitDiagnostics -t -1 --expectSignal 0 {datacard} -n fitDiagnostics_{name}_bkgOnly -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots echo python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgOnly.root -g fitDiagnostics_{name}_bkgOnly_plots.root combine -M FitDiagnostics -t -1 --expectSignal 0 {datacard} -n _{name}_bkgOnly -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgOnly.root -g fitDiagnostics_{name}_bkgOnly_plots.root --skipFitS > fitDiagnostics_{name}_bkgOnly.log python ../../printPulls.py fitDiagnostics_{name}_bkgOnly_plots.root combine -M FitDiagnostics -t -1 --expectSignal 1 {datacard} -n _{name}_bkgPlusSig -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgPlusSig.root -g fitDiagnostics_{name}_bkgPlusSig_plots.root --skipFitB > fitDiagnostics_{name}_bkgPlusSig.log python ../../printPulls.py fitDiagnostics_{name}_bkgPlusSig_plots.root #print NLL for check combineTool.py -M FastScan -w {name}_combine_workspace.root:w -o {name}_nll """.format(workspace_root=workspace_file, datacard=os.path.basename(datacard), name=output_prefix, fake_mass=fake_mass, systematics=(0 if options.nosys else 1)) script_file = os.path.join(output_dir, output_prefix + '_run_closureChecks.sh') with open(script_file, 'w') as f: f.write(script) st = os.stat(script_file) os.chmod(script_file, st.st_mode | stat.S_IEXEC) # Write small script for impacts script = """#! /bin/bash # Run impacts combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --doInitialFit --robustFit=1 --robustHesse 1 --rMin -20 --rMax 20 -t -1 combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --robustFit=1 --robustHesse 1 --doFits --rMin -20 --rMax 20 -t -1 --parallel 32 combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 -o {name}_expected_impacts.json --rMin -20 --rMax 20 -t -1 plotImpacts.py -i {name}_expected_impacts.json -o {name}_expected_impacts --per-page 50 combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --doInitialFit --robustFit=1 --robustHesse 1 --rMin -20 --rMax 20 combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --robustFit=1 --doFits --robustHesse 1 --rMin -20 --rMax 20 --parallel 32 combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 -o {name}_impacts.json --rMin -20 --rMax 20 plotImpacts.py -i {name}_impacts.json -o {name}_impacts --per-page 50 """.format(workspace_root=workspace_file, datacard=os.path.basename(datacard), name=output_prefix, fake_mass=fake_mass, systematics=(0 if options.nosys else 1)) script_file = os.path.join(output_dir, output_prefix + '_run_impacts.sh') with open(script_file, 'w') as f: f.write(script) st = os.stat(script_file) os.chmod(script_file, st.st_mode | stat.S_IEXEC) # Write small script for postfit shapes script = """#! /bin/bash # Run postfit echo combine -M FitDiagnostics {datacard} -n _{name}_postfit --saveNormalizations --saveShapes --saveWithUncertainties --preFitValue 0 --rMin -20 --rMax 20 --robustHesse 1 --robustFit=1 -v 1 combine -M FitDiagnostics {datacard} -n _{name}_postfit --saveNormalizations --saveShapes --saveWithUncertainties --preFitValue 0 --rMin -20 --rMax 20 --robustHesse 1 --robustFit=1 -v 1 #--plots PostFitShapesFromWorkspace -w {name}_combine_workspace.root -d {datacard} -o postfit_shapes_{name}.root -f fitDiagnostics_{name}_postfit.root:fit_b --postfit --sampling python ../../convertPostfitShapesForPlotIt.py -i postfit_shapes_{name}.root $CMSSW_BASE/src/UserCode/HEPToolsFCNC/plotIt/plotIt -o postfit_shapes_{name}_forPlotIt ../../postfit_plotIt_config_{coupling}_{year}.yml -y $CMSSW_BASE/src/UserCode/HEPToolsFCNC/plotIt/plotIt -o postfit_shapes_{name}_forPlotIt ../../postfit_plotIt_config_{coupling}_{year}_qcd.yml -y """.format(workspace_root=workspace_file, datacard=os.path.basename(datacard), name=output_prefix, fake_mass=fake_mass, systematics=(0 if options.nosys else 1), coupling=("Hut" if "Hut" in output_prefix else "Hct"), year=options.dataYear) script_file = os.path.join(output_dir, output_prefix + '_run_postfit.sh') with open(script_file, 'w') as f: f.write(script) st = os.stat(script_file) os.chmod(script_file, st.st_mode | stat.S_IEXEC)
def writeCard(input,theLambda,select,region=-1) : print "writing cards" variables =[] if opt.isResonant : variables.append('HHKin_mass_raw') else : variables.append('MT2') #out_dir = opt.outDir theOutputDir = "{0}{1}{2}".format(theLambda,select,variables[0]) dname = "_"+opt.channel+opt.outDir out_dir = "cards{1}/{0}/".format(theOutputDir,dname) print out_dir #in_dir = "/grid_mnt/vol__vol_U__u/llr/cms/ortona/diHiggs/CMSSW_7_4_7/src/KLUBAnalysis/combiner/cards_MuTauprova/HHSM2b0jMcutBDTMT2/"; cmb1 = ch.CombineHarvester() cmb1.SetFlag('workspaces-use-clone', True) cmd = "mkdir -p {0}".format(out_dir) print cmd regionName = ["","regB","regC","regD"] regionSuffix = ["SR","SStight","OSinviso","SSinviso"] status, output = commands.getstatusoutput(cmd) #outFile = opt.outDir+"/chCard{0}{2}_{1}_{3}.txt".format(theLambda,opt.channel,regionName[region+1],select) thechannel = "1" if opt.channel == "MuTau" : thechannel="2" elif opt.channel == "TauTau" : thechannel = "3" if "0b0j" in select : theCat = "0" if "2b0j" in select : theCat = "2" elif "1b1j" in select : theCat = "1" elif "boosted" in select : theCat = "3" outFile = "hh_{0}_C{1}_L{2}_13TeV.txt".format(thechannel,theCat,theLambda) file = open( "temp.txt", "wb") #read config categories = [] #for icat in range(len(input.selections)) : # categories.append((icat, input.selections[icat])) categories.append((0,select)) backgrounds=[] MCbackgrounds=[] processes=[] processes.append(lambdaName) inRoot = TFile.Open(opt.filename) for bkg in input.background: #Add protection against empty processes => If I remove this I could build all bins at once instead of looping on the selections templateName = "{0}_{1}_SR_{2}".format(bkg,select,variables[0]) print templateName template = inRoot.Get(templateName) if template.Integral()>0.000001 : backgrounds.append(bkg) processes.append(bkg) if bkg is not "QCD" : MCbackgrounds.append(bkg) #print backgrounds allQCD = False allQCDs = [0,0,0,0] for regionsuff in range(len(regionSuffix)) : for ichan in range(len(backgrounds)): if "QCD" in backgrounds[ichan] : fname = "data_obs" if regionSuffix[regionsuff] == "SR" : fname="QCD" templateName = "{0}_{1}_{3}_{2}".format(fname,select,variables[0],regionSuffix[regionsuff]) template = inRoot.Get(templateName) #allQCDs.append(template.Integral()) allQCDs[regionsuff]= allQCDs[regionsuff]+template.Integral() iQCD = ichan elif regionSuffix[regionsuff] is not "SR" : templateName = "{0}_{1}_{3}_{2}".format(backgrounds[ichan],select,variables[0],regionSuffix[regionsuff]) template = inRoot.Get(templateName) allQCDs[regionsuff] = allQCDs[regionsuff] - template.Integral() if allQCDs[0]>0 and allQCDs[1]>0 and allQCDs[2]>0 and allQCDs[3]>0 : allQCD = True for i in range(4) : print allQCDs[i] #add processes to CH #masses->125 #analyses->Res/non-Res(HHKin_fit,MT2) #eras->13TeV #channels->mutau/tautau/etau #bin->bjet categories #print signals, signals[0] cmb1.AddObservations([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], categories) cmb1.AddProcesses([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], backgrounds, categories, False) cmb1.AddProcesses([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], [lambdaName], categories, True) #signals[0] if region < 0 : #Systematics (I need to add by hand the shape ones) #potrei sostituire theLambda con "signal" #syst = systReader("../config/systematics.cfg",[theLambda],backgrounds,file) syst = systReader("../config/systematics.cfg",[lambdaName],backgrounds,file) syst.writeOutput(False) syst.verbose(True) if(opt.channel == "TauTau" ): syst.addSystFile("../config/systematics_tautau.cfg") elif(opt.channel == "MuTau" ): syst.addSystFile("../config/systematics_mutau.cfg") #if(opt.isResonant): # syst.addSystFile("../config/systematics_resonant.cfg") #else : syst.addSystFile("../config/systematics_nonresonant.cfg") elif(opt.channel == "ETau" ): syst.addSystFile("../config/systematics_etau.cfg") #if(opt.isResonant): # syst.addSystFile("../config/systematics_resonant.cfg") #else : syst.addSystFile("../config/systematics_nonresonant.cfg") if opt.theory : syst.addSystFile("../config/syst_th.cfg") syst.writeSystematics() for isy in range(len(syst.SystNames)) : if "CMS_scale_t" in syst.SystNames[isy] or "CMS_scale_j" in syst.SystNames[isy]: continue for iproc in range(len(syst.SystProcesses[isy])) : if "/" in syst.SystValues[isy][iproc] : f = syst.SystValues[isy][iproc].split("/") systVal = (float(f[0]),float(f[1])) else : systVal = float(syst.SystValues[isy][iproc]) #print isy, iproc, systVal print "adding Syst",systVal,syst.SystNames[isy],syst.SystTypes[isy],"to",syst.SystProcesses[isy][iproc] cmb1.cp().process([syst.SystProcesses[isy][iproc]]).AddSyst(cmb1, syst.SystNames[isy],syst.SystTypes[isy],ch.SystMap('channel','bin_id')([opt.channel],[0],systVal)) if opt.shapeUnc > 0: jesproc = MCbackgrounds jesproc.append(lambdaName) if "1b1j" in select and opt.channel == "TauTau" : jesproc.remove("DY0b") cmb1.cp().process(jesproc).AddSyst(cmb1, "CMS_scale_j_13TeV","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000)) cmb1.cp().process(jesproc).AddSyst(cmb1, "CMS_scale_t_13TeV","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000)) cmb1.cp().process(["TT"]).AddSyst(cmb1, "top","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000)) # $BIN --> proc.bin() # $PROCESS --> proc.process() # $MASS --> proc.mass() # $SYSTEMATIC --> syst.name() # cmb1.cp().ExtractShapes( # opt.filename, # "$PROCESS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]), # "$PROCESS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1])) cmb1.cp().backgrounds().ExtractShapes( opt.filename, "$PROCESS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]), "$PROCESS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1])) cmb1.cp().signals().ExtractShapes( opt.filename, "$PROCESS$MASS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]), "$PROCESS$MASS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1])) bbb = ch.BinByBinFactory() bbb.SetAddThreshold(0.1).SetMergeThreshold(0.5).SetFixNorm(True) bbbQCD = ch.BinByBinFactory() bbbQCD.SetAddThreshold(0.0).SetMergeThreshold(0.5).SetFixNorm(True) if opt.binbybin : bbb.MergeBinErrors(cmb1.cp().process(MCbackgrounds)) bbbQCD.MergeBinErrors(cmb1.cp().process(["QCD"])) bbbQCD.AddBinByBin(cmb1.cp().process(["QCD"]), cmb1) bbb.AddBinByBin(cmb1.cp().process(MCbackgrounds), cmb1) #cmb1.cp().PrintProcs().PrintSysts() #outroot = TFile.Open(opt.outDir+"/chCard{0}{2}_{1}_{3}.input.root".format(theLambda,opt.channel,regionName[region+1],select),"RECREATE") #outtxt = "hh_{0}_C{1}_L{2}_13TeV.txt".format(theChannel,theCat,theHHLambda) outroot = TFile.Open(out_dir+"hh_{0}_C{1}_L{2}_13TeV.input.root".format(thechannel,theCat,theLambda),"RECREATE") cmb1.WriteDatacard(out_dir+outFile,out_dir+"hh_{0}_C{1}_L{2}_13TeV.input.root".format(thechannel,theCat,theLambda)) if allQCD : file = open( out_dir+outFile, "a") file.write("alpha rateParam {0} QCD (@0*@1/@2) QCD_regB,QCD_regC,QCD_regD".format(select)) elif allQCD : #print thechannel,theCat,theLambda #,regionName2[region+1] #outFile = "hh_{0}_C{1}_L{2}_13TeV.txt".format(thechannel,theCat,theLambda) #print region, allQCD #print regionName2[region+1] #print outFile #print "hh_"+thechannel#+"_C"+theCat+"_L"+theLambda+"_13TeV_"+regionName[region+1]+".txt" #print "hh_"+thechannel+"_C"+theCat#+"_L"+theLambda+"_13TeV_"+regionName[region+1]+".txt" #print "hh_"+thechannel+"_C"+theCat+"_L"+theLambda#+"_13TeV_"+regionName[region+1]+".txt" #print "hh_"+thechannel+"_C"+theCat+"_L"+theLambda+"_13TeV_"#+regionName[region+1]+".txt" #print outFile outFile = "hh_{0}_C{1}_L{2}_13TeV_{3}.txt".format(thechannel,theCat,theLambda,regionName[region+1]) file = open( out_dir+outFile, "wb") file.write("imax 1\n") file.write("jmax {0}\n".format(len(backgrounds)-1)) file.write("kmax *\n") file.write("------------\n") file.write("shapes * * FAKE\n".format(opt.channel,regionName[region+1])) file.write("------------\n") templateName = "data_obs_{1}_{3}_{2}".format(bkg,select,variables[0],regionSuffix[region+1]) template = inRoot.Get(templateName) file.write("bin {0} \n".format(select)) obs = template.GetEntries() file.write("observation {0} \n".format(obs)) file.write("------------\n") file.write("bin ") for chan in backgrounds: file.write("{0} ".format(select)) file.write("\n") file.write("process ") for chan in backgrounds: file.write("{0} ".format(chan)) #file.write("QCD ") file.write("\n") file.write("process ") for chan in range(len(backgrounds)): #+1 for the QCD file.write("{0} ".format(chan+1)) file.write("\n") file.write("rate ") rates = [] iQCD = -1 totRate = 0 for ichan in range(len(backgrounds)): if "QCD" in backgrounds[ichan] : rates.append(-1) iQCD = ichan else : templateName = "{0}_{1}_{3}_{2}".format(backgrounds[ichan],select,variables[0],regionSuffix[region+1]) template = inRoot.Get(templateName) #print templateName brate = template.Integral() rates.append(brate) totRate = totRate + brate if iQCD >= 0 : rates[iQCD] = TMath.Max(0.0000001,obs-totRate) for ichan in range(len(backgrounds)): file.write("{0:.4f} ".format(rates[ichan])) file.write("\n") file.write("------------\n") file.write("QCD_{0} rateParam {1} QCD 1 \n".format(regionName[region+1],select))
def prepareShapesAndCards(options): cb = ch.CombineHarvester() if options.fit_mode == 'shape_CR1': cats = [ (1, 'SR'), (2, 'CR1') ] print('-- QCD estimation: fit bin-by-bin by assuming shape in CR1 and SR is the same --') elif options.fit_mode == 'abcd': cats = [ (1, 'SR'), (2, 'CR1'), (3, 'VR'), (4, 'CR2'), ] print('-- QCD etimation: bin-by-bin ABCD using the four regions --') # object to handle the factorisation of uncertainties among ttbar components factTheory = defs.FactorisedTheory(options.fact_theory) # factorise shape uncertainties for ttbar components theory_shape_systs = [] for procs, syst in defs.theory_shape_systs: for newProcs,newSyst in factTheory.getGrouping(procs, syst): theory_shape_systs.append((newProcs, newSyst)) if options.randomise: print("-- Will randomise MC predictions according to MC stat uncertainties!") # Process shapes processed_shapes = os.path.join(options.output, 'processed_shapes.root') QCD_VR_ratios = utils.extractShapes(options.input, processed_shapes, defs.tt_bkg + defs.other_bkg, defs.sig_processes, options.data, fact_theory=factTheory, equal_bins=options.equal_bins, sub_folder=options.sub_folder, randomise=options.randomise, rebinSB=options.rebinsb) Nbins = len(QCD_VR_ratios) cb.AddObservations(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], cats) cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], defs.sig_processes, cats, True) cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], defs.tt_bkg + defs.other_bkg, cats, False) ### QCD estimate: add all "delta" templates QCD_processes = [ 'QCD_bin_{}'.format(i+1) for i in range(Nbins) ] cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], QCD_processes, cats, False) ### Systematics added_theory_systs = [] added_exp_systs = [] # Modeling systematics, not on QCD! ### cbWithoutQCD = cb.cp().process_rgx(['QCD.*'], False) # Theory rate uncertainties from the JSON file if options.rate_systs is not None: for json_file in options.rate_systs: added_theory_systs += addRateSystematics(cb, json_file, options.sub_folder, factTheory) # Experimental rate uncertainties from the JSON file if options.exp_rate is not None: for json_file in options.exp_rate: added_exp_systs += addRateSystematics(cb, json_file, options.sub_folder) # Luminosity cbWithoutQCD.AddSyst(cb, 'lumi_$ERA', 'lnN', ch.SystMap('era')(['13TeV_2016'], defs.getLumiUncertainty('13TeV_2016'))) added_exp_systs.append('lumi_13TeV_2016') # Experimental systematics, common for all processes and categories for s in defs.exp_systs: # If we have added it already as a rate systematics, skip it! if s not in added_exp_systs: added_exp_systs.append(s) cbWithoutQCD.AddSyst(cb, s, 'shape', ch.SystMap()(1.)) # Theory shape systematics for syst in theory_shape_systs: if syst[1] not in added_theory_systs: added_theory_systs.append(syst[1]) cbWithoutQCD.cp().process(syst[0]).AddSyst(cb, syst[1], 'shape', ch.SystMap()(1.)) # Theory rate systematics (not taken from JSON) for name,syst in defs.theory_rate_systs.items(): if not name in added_theory_systs: added_theory_systs.append(name) cbWithoutQCD.AddSyst(cb, name, syst[0], syst[1]) ### QCD systematics: add a lnN for each bin using the ratio QCD_subtr/QCD_est in the VR if options.QCD_systs: print('-- Will apply bin-by-bin uncertainties on QCD estimate from ratio in VR --') if options.fit_mode == 'shape_CR1': for i in range(1, Nbins+1): # lnN = 1 + abs(1 - QCD_VR_ratios[i-1]) ratio = QCD_VR_ratios[i-1] lnN = ratio if ratio > 1 else 1./ratio cb.cp().bin(['SR']).process(['QCD_bin_{}'.format(i)]).AddSyst(cb, 'QCD_shape_bin_{}'.format(i), 'lnN', ch.SystMap()(lnN)) elif options.fit_mode == 'abcd': # using max # QCD_VR_ratios = [1.1047956681135658, 1.104982852935791, 1.0103355569221637, 1.0365746040205628, 1.027778957040471, 1.1635257239763037, 1.0604289770126343, 1.0326651334762573, 1.0882024148481384, 1.0879310369491577, 1.2372238755691953, 1.1039656400680542, 1.1208300590515137, 1.1252394914627075, 1.0652162084238805, 1.1746360299507677, 1.1441897907967598, 1.032749056816101, 1.1105864995541361, 1.264707088470459, 1.1289979219436646, 1.1032386479572462, 1.3740112781524658, 1.0779788494110107, 1.0679041983173836, 1.1521316766738892, 1.0189466861549783, 1.1371627554677426, 1.180934637513623, 1.0807719230651855, 1.1220710277557373, 1.2163840919860773, 1.1803903579711914, 1.1331188470149183, 1.2841500043869019, 1.124382576013972, 1.2853591442108154, 1.1161022064238948, 1.0491153764429137, 1.3020191192626953, 1.6365387568006153, 1.3135310411453247, 1.183979775003691, 1.3237843031833378, 1.105936050415039, 1.4582525497144114, 1.2740960121154785, 1.1744883060455322, 1.2689180716203021, 1.5666807889938354, 1.1884409189224243, 1.6787212785213594, 1.1295689911887752, 1.2143068313598633, 1.144478440284729] # using geometric average QCD_VR_ratios = [1.0556093647141687, 1.0658984862062695, 1.0057472468756388, 1.0208612636340562, 1.0185833946498413, 1.1211169739938442, 1.0353973123690785, 1.0258664065695766, 1.0586147959018684, 1.0522305760086619, 1.1354690006073973, 1.072695547895069, 1.0799492240063984, 1.0621373200388462, 1.0593700756267987, 1.1529412209016232, 1.122536304991689, 1.0187320772559685, 1.0972767308832914, 1.175709681780302, 1.0832093989858067, 1.0823283151259013, 1.1831016555993352, 1.054608634579664, 1.0599488955753065, 1.0752925245754967, 1.017269399510584, 1.122209514629158, 1.1702168450787551, 1.0695165830450506, 1.0857979999559528, 1.2041393773004465, 1.1151294041413826, 1.1230274391829085, 1.2502545040629076, 1.1070056845911258, 1.139110776895264, 1.082765772887927, 1.0487710649869804, 1.2332536614187524, 1.4655095128617284, 1.19038044691305, 1.1104215756611893, 1.1838495100927606, 1.0880046566588846, 1.4004062409319984, 1.248629899444753, 1.1411489734003788, 1.1805956668619682, 1.4378115712379096, 1.129952938278906, 1.3437817991926544, 1.0912141233598036, 1.1453139866153634, 1.1135893789689448] for i in range(1, Nbins+1): lnN = 1.05 # lnN = QCD_VR_ratios[i-1] cb.cp().bin(['SR']).process(['QCD_bin_{}'.format(i)]).AddSyst(cb, 'QCD_shape_bin_{}'.format(i), 'lnN', ch.SystMap()(lnN)) # cb.cp().process(['ttlf']).AddSyst(cb, 'ttlf_norm', 'lnN', ch.SystMap()(1.2)) extraStrForQCD = '' # To define nuisance group with all QCD parameters paramListQCD = [] if options.fit_mode == 'shape_CR1': ### QCD estimate: fit shape from CR1, normalisation floating extraStrForQCD += 'scale_ratio_QCD_CR1_SR extArg 1. [0.,2.]\n' paramListQCD.append('scale_ratio_QCD_CR1_SR') for i in range(1, Nbins+1): extraStrForQCD += 'yield_QCD_SR_bin_{0} rateParam SR QCD_bin_{0} 1. [0.,2.]\n'.format(i) paramListQCD.append('yield_QCD_SR_bin_{}'.format(i)) for i in range(1, Nbins+1): extraStrForQCD += 'yield_QCD_CR1_bin_{0} rateParam CR1 QCD_bin_{0} (@0*@1) scale_ratio_QCD_CR1_SR,yield_QCD_SR_bin_{0}\n'.format(i) if options.QCD_systs: for i in range(1, Nbins+1): paramListQCD.append('QCD_shape_bin_{}'.format(i)) elif options.fit_mode == 'abcd': ### QCD estimate: add the rate params for each bin in the CR1, CR2 and VR ### The yield in the SR is then expressed as CR1*VR/CR2 for i in range(1, Nbins+1): extraStrForQCD += 'yield_QCD_CR1_bin_{0} rateParam CR1 QCD_bin_{0} 1. [0.,5.]\n'.format(i) extraStrForQCD += 'yield_QCD_CR2_bin_{0} rateParam CR2 QCD_bin_{0} 1. [0.,5.]\n'.format(i) extraStrForQCD += 'yield_QCD_VR_bin_{0} rateParam VR QCD_bin_{0} 1. [0.,5.]\n'.format(i) extraStrForQCD += 'yield_QCD_SR_bin_{0} rateParam SR QCD_bin_{0} (@0*@1/@2) yield_QCD_VR_bin_{0},yield_QCD_CR1_bin_{0},yield_QCD_CR2_bin_{0}\n'.format(i) paramListQCD.append('yield_QCD_CR1_bin_{}'.format(i)) paramListQCD.append('yield_QCD_CR2_bin_{}'.format(i)) paramListQCD.append('yield_QCD_VR_bin_{}'.format(i)) cb.AddDatacardLineAtEnd(extraStrForQCD) # Define systematic groups syst_groups = { "theory": added_theory_systs, "exp": added_exp_systs, "QCD": paramListQCD, "extern": defs.externalised_nuisances, } def getNuisanceGroupString(groups): m_str = "" for g in groups: m_str += g + ' group = ' for sys in groups[g]: m_str += sys + ' ' m_str += '\n' return m_str cb.AddDatacardLineAtEnd(getNuisanceGroupString(syst_groups)) cb.cp().ExtractShapes(processed_shapes, '$BIN/$PROCESS', '$BIN/$PROCESS_$SYSTEMATIC') if options.bbb: print('-- Will add bin-by-bin uncertainties for MC statistics --') # MC statistics - has to be done after the shapes have been extracted! # bbb_bkg = ch.BinByBinFactory().SetVerbosity(5) # bbb_bkg.SetAddThreshold(0.02).SetMergeThreshold(0.5).SetFixNorm(False) # bbb_bkg.MergeBinErrors(cbWithoutQCD.cp().backgrounds()) # bbb_bkg.AddBinByBin(cbWithoutQCD.cp().backgrounds(), cb) # bbb_sig = ch.BinByBinFactory().SetVerbosity(5).SetFixNorm(False) # bbb_sig.MergeBinErrors(cbWithoutQCD.cp().signals()) # bbb_sig.AddBinByBin(cbWithoutQCD.cp().signals(), cb) # bbb = ch.BinByBinFactory().SetVerbosity(5) # bbb.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False).SetPoissonErrors(True) # bbb.MergeBinErrors(cb.cp()) # bbb.AddBinByBin(cb.cp(), cb) # bbb_sig = ch.BinByBinFactory().SetVerbosity(5) # bbb_sig.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False) # bbb_sig.MergeBinErrors(cb.cp().signals()) # bbb_sig.AddBinByBin(cb.cp().signals(), cb) # bbb_bkg = ch.BinByBinFactory().SetVerbosity(5) # bbb_bkg.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False) # bbb_bkg.MergeBinErrors(cb.cp().backgrounds()) # bbb_bkg.AddBinByBin(cb.cp().backgrounds(), cb) # Use combine internal BBB (default: BB lite, merging everything for sig & bkg separately?) cb.AddDatacardLineAtEnd("* autoMCStats 5000 0 1\n") # cb.AddDatacardLineAtEnd("* autoMCStats 10000000 0 1\n") output_dir = options.output if not os.path.exists(output_dir): os.makedirs(output_dir) datacard = os.path.join(output_dir, 'datacard.dat') output_shapes = os.path.join(output_dir, 'shapes.root') cb.WriteDatacard(datacard, output_shapes) initWorkSpace = """ #!/bin/bash if [[ ! -f workspace.root ]]; then text2workspace.py datacard.dat -o workspace.root fi RMIN=0. RMAX=5.0 NPOINTS=50 export FIT_OPT=( --freezeNuisanceGroups=extern --cminDefaultMinimizerStrategy 0 --X-rtd MINIMIZER_MaxCalls=999999999 --X-rtd MINIMIZER_analytic --robustFit 1 --cminDefaultMinimizerPrecision 1E-12 ) """ if options.data: print("-- WILL USE REAL DATA IN SR") initWorkSpace += 'export TOY=""\n' else: print("-- Will use Asimov toy") initWorkSpace += 'export TOY="-t -1"\n' def createScript(content, filename): script_path = os.path.join(output_dir, filename) with open(script_path, 'w') as f: f.write(initWorkSpace) f.write(content) # make script executable st = os.stat(script_path) os.chmod(script_path, st.st_mode | stat.S_IEXEC) # Script: simple fit, fit diagnostics, postfit plots, frequentist toys, goodness of fit script = """ combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal=1 ${TOY} --saveWorkspace --algo singles --setCrossingTolerance 1E-7 "${FIT_OPT[@]}" 2>&1 | tee fit.log #combine -M FitDiagnostics -d workspace.root --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 --skipBOnlyFit --saveShapes --saveNormalizations --saveWithUncertainties --plots "${FIT_OPT[@]}" 2>&1 | tee fitDiag.log #combine -M FitDiagnostics -d workspace.root --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 --skipBOnlyFit --robustHesse 1 "${FIT_OPT[@]}" 2>&1 | tee fitDiag.log #../plotCovariance.py # frequentist toys #combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal $1 -t 1000 -n _freq_$1 --toysFrequentist "${FIT_OPT[@]}" -s -1 > freq_$1.log #combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal $1 -t 1000 -n _freqNoSyst_$1 --toysNoSystematics "${FIT_OPT[@]}" -s -1 > freqNoSyst_$1.log # Goodness of fit #combine -M GoodnessOfFit workspace.root --algo=saturated "${FIT_OPT[@]}" #parallel --gnu -j 5 -n0 combine -M GoodnessOfFit workspace.root --algo=saturated "${FIT_OPT[@]}" -t 100 -s -1 --toysFreq ::: {1..10} #hadd higgsCombineTest.GoodnessOfFit.mH120.toys.root higgsCombineTest.GoodnessOfFit.mH120.*.root # Postfit plots #PostFitShapesFromWorkspace -d datacard.dat -w workspace.root -o postfit_shapes.root -m 120 -f fitDiagnostics.root:fit_s --postfit --sampling --print 2>&1 | tee postFitRates.log """ createScript(script, 'do_fit.sh') # Script: plots of NLL vs. r for different uncertainties script = """ RMIN=0.5 RMAX=3. combine -M MultiDimFit --algo grid --points $NPOINTS --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 -n _nominal workspace.root "${FIT_OPT[@]}" # stat-only: get best-fit parameters and dataset from saved workspace (so that it also works for post-fit) combine -M MultiDimFit --algo grid --points $NPOINTS --rMin $RMIN --rMax $RMAX -n _stat -S 0 --snapshotName "MultiDimFit" -d higgsCombineTest.MultiDimFit.mH120.root "${FIT_OPT[@]}" plot1DScan.py higgsCombine_nominal.MultiDimFit.mH120.root --others 'higgsCombine_stat.MultiDimFit.mH120.root:Freeze all:2' --breakdown syst,stat """ createScript(script, 'do_DeltaNLL_plot.sh') # Script: impacts signal injected script = """ folder=impacts mkdir ${folder} pushd ${folder} combineTool.py -M Impacts -d ../workspace.root ${TOY} -m 120 --rMin $RMIN --rMax $RMAX --expectSignal=1 --doInitialFit "${FIT_OPT[@]}" combineTool.py -M Impacts -d ../workspace.root ${TOY} -m 120 --rMin $RMIN --rMax $RMAX --expectSignal=1 --doFits --parallel 6 "${FIT_OPT[@]}" --setParameterRanges CMS_qg_Weight=-2,2 --cminPreScan combineTool.py -M Impacts -d ../workspace.root -m 120 -o impacts.json plotImpacts.py -i impacts.json -o impacts plotImpacts.py -i impacts.json -o impacts_qcd --groups QCD plotImpacts.py -i impacts.json -o impacts_no_qcd --groups '!QCD' '!extern' popd """ createScript(script, 'do_impacts.sh') # Script: plots of NLL vs. nuisance parameters script = """ function scan_param() {{ combine -M MultiDimFit --algo grid --points 20 -n _$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 "${{FIT_OPT[@]}}" --floatOtherPOIs 1 plot1DScan.py higgsCombine_$1.MultiDimFit.mH120.root --output scan_$1 --POI $1 combine -M MultiDimFit --algo grid --points 20 -n _freeze_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 "${{FIT_OPT[@]}}" -S 0 --floatOtherPOIs 1 plot1DScan.py higgsCombine_freeze_$1.MultiDimFit.mH120.root --output scan_freeze_$1 --POI $1 combine -M MultiDimFit --algo grid --points 20 -n _freezeQCD_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 --freezeNuisanceGroups extern,QCD --floatOtherPOIs 1 plot1DScan.py higgsCombine_freezeQCD_$1.MultiDimFit.mH120.root --output scan_freezeQCD_$1 --POI $1 }} export -f scan_param # needed for parallel mkdir scans pushd scans SHELL=/bin/bash parallel --gnu -j 6 scan_param ::: {params} popd """ createScript(script.format(scan="-2,2", params=" ".join(syst_groups['exp'])), 'do_exp_scans.sh') createScript(script.format(scan="-2,2", params=" ".join(syst_groups['theory'])), 'do_theory_scans.sh') script = """ function scan_param() {{ combine -M MultiDimFit --algo grid --points 20 -n _$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3 -P $1 --autoRange 3 "${{FIT_OPT[@]}}" --floatOtherPOIs 1 plot1DScan.py higgsCombine_$1.MultiDimFit.mH120.root --output scan_$1 --POI $1 combine -M MultiDimFit --algo grid --points 20 -n _freeze_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3 -P $1 --autoRange 3 "${{FIT_OPT[@]}}" -S 0 --floatOtherPOIs 1 plot1DScan.py higgsCombine_freeze_$1.MultiDimFit.mH120.root --output scan_freeze_$1 --POI $1 }} export -f scan_param # needed for parallel mkdir scans pushd scans SHELL=/bin/bash parallel --gnu -j 6 scan_param ::: {params} popd """ createScript(script.format(params=" ".join(syst_groups['QCD'])), 'do_QCD_scans.sh')
#!/usr/bin/env python # -*- coding: utf-8 -*- import ROOT ROOT.gSystem.Load("libHiggsAnalysisCombinedLimit") ROOT.gROOT.SetBatch() import os import CombineHarvester.CombineTools.ch as ch import argparse p = argparse.ArgumentParser( "Script to print out systematics of a given process in a given category for a given 'combined.txt.cmb' datacard. This does not included additional uncertainties, that could be added by the signal model used (e.g. by MSSM models)" ) p.add_argument("--category", required=True, help="Category to be inspected") p.add_argument("--process", required=True, help="Process to be inspected") p.add_argument("--datacard", required=True, help="Path to the datacard 'combined.txt.cmb'") args = p.parse_args() cmb_card = ch.CombineHarvester() cmb_card.SetFlag("workspaces-use-clone", True) cmb_card.ParseDatacard(args.datacard, "", "", "", 0, "200") cmb_card.bin([args.category]).cp().PrintProcs() cmb_card.bin([args.category]).cp().process([args.process]).PrintSysts()
def main(): if len(sys.argv) < 2: print 'Error! No data filename specified (in json format). Exiting...' exit(0) lumi_syst = {'2016': 1.025, '2017': 1.023, '2018': 1.025} trig_MC_syst = {'2016': 1.01, '2017': 1.01, '2018': 1.01} trig_data_syst = {'2016': 1.005, '2017': 1.005, '2018': 1.005} JES_syst = {'2016': 1.02, '2017': 1.06, '2018': 1.02} JER_syst = {'2016': 1.01, '2017': 1.09, '2018': 1.025} #EGM_ID_syst = {'2016': 1.02, '2017': 1.03, '2018': 1.03} veto_ID_syst = {'2016': 1.005, '2017': 1.005, '2018': 1.005} GM_ID_syst = { # split by match category '0': {'2016': 1.000, '2017': 1.000, '2018': 1.000}, # just dummy = 1, no GMs '1': {'2016': 1.007, '2017': 1.003, '2018': 1.005}, '2': {'2016': 1.012, '2017': 1.005, '2018': 1.010} } dSA_ID_prompt_syst = {'2016': 1.006, '2017': 1.007, '2018': 1.006} dSA_ID_displ_syst = {'2016': 1.001, '2017': 1.001, '2018': 1.001} dSA_reco_prompt_syst = {'2016': 1.004, '2017': 1.003, '2018': 1.003} # background ABCD closure syst closure_syst_1mm = { '0': {'2016': 1.25, '2017': 1.25, '2018': 1.25}, '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05}, '2': {'2016': 1.80, '2017': 1.80, '2018': 1.80} } closure_syst_10mm = { '0': {'2016': 1.25, '2017': 1.25, '2018': 1.25}, '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05}, '2': {'2016': 1.80, '2017': 1.80, '2018': 1.80} } closure_syst_100mm = { '0': {'2016': 1.20, '2017': 1.20, '2018': 1.20}, '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05}, '2': {'2016': 1.20, '2017': 1.20, '2018': 1.20} } closure_syst_1000mm = { '0': {'2016': 1.10, '2017': 1.10, '2018': 1.10}, '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05}, '2': {'2016': 1.20, '2017': 1.20, '2018': 1.20} } cat_translator = {u'29': '0', u'30': '1', u'31': '2'} with open(sys.argv[1]) as f: data = json.load(f) tot_yields = {} for name,yields in data.iteritems(): sample = name.split('_sig_')[-1] cut = [token for token in name.split('_') if 'cut' in token][0] if '29' not in cut and '30' not in cut and '31' not in cut: continue if '29' in cut or '30' in cut: if 'vxy_zoom_' not in name: continue else: if 'vxy_zoomzoom_' not in name: continue print "name ", name if yields['A_sig'] == 0.0 and yields['B_sig'] == 0.0 and yields['C_sig'] == 0.0 and yields['D_sig'] == 0.0: continue if sample not in tot_yields: tot_yields[sample] = {} cut_num = cut.split('cut')[-1] if cut_num not in tot_yields[sample]: tot_yields[sample][cut_num] = {} tot_yields[sample][cut_num]['A_sig'] = yields["A_sig"] tot_yields[sample][cut_num]['B_sig'] = yields["B_sig"] tot_yields[sample][cut_num]['C_sig'] = yields["C_sig"] tot_yields[sample][cut_num]['D_sig'] = yields["D_sig"] tot_yields[sample][cut_num]['A_bkg'] = max(yields["A_bkg"], 0.1) tot_yields[sample][cut_num]['B_bkg'] = max(yields["B_bkg"], 0.1) tot_yields[sample][cut_num]['C_bkg'] = max(yields["C_bkg"], 0.1) tot_yields[sample][cut_num]['D_bkg'] = max(yields["D_bkg"], 0.1) tot_yields[sample][cut_num]['c1'] = tot_yields[sample][cut_num]['B_bkg'] / tot_yields[sample][cut_num]['A_bkg'] tot_yields[sample][cut_num]['c2'] = tot_yields[sample][cut_num]['C_bkg'] / tot_yields[sample][cut_num]['A_bkg'] c1s = {} c2s = {} for sample in tot_yields: if '161718' not in sample: continue basename_no_year = sample.split('_161718')[0] c1s[basename_no_year] = {} c2s[basename_no_year] = {} for cut_num,props in tot_yields[sample].items(): c1s[basename_no_year][cut_num] = props['c1'] c2s[basename_no_year][cut_num] = props['c2'] #! [part1] # Define four categories labeled A, B, C and D, and # set the observed yields in a map. cats = OrderedDict() obs_rates = OrderedDict() sig_rates = OrderedDict() systs = OrderedDict() names = OrderedDict() for sample in tot_yields: if '161718' in sample: continue if '2016' in sample: year = '2016' elif '2017' in sample: year = '2017' elif '2018' in sample: year = '2018' basename_no_year = sample.split('_'+year)[0] if basename_no_year not in cats: cats[basename_no_year] = [] obs_rates[basename_no_year] = {} sig_rates[basename_no_year] = {} systs[basename_no_year] = {} names[basename_no_year] = basename_no_year for cut in tot_yields[sample]: cat_temp = cat_translator[cut] + 'match' + '_' + year index = 0 if len(cats[basename_no_year]) == 0 else cats[basename_no_year][-1][0] + 1 cats[basename_no_year].append((index, 'A_' + cat_temp)) cats[basename_no_year].append((index, 'B_' + cat_temp)) cats[basename_no_year].append((index, 'C_' + cat_temp)) cats[basename_no_year].append((index, 'D_' + cat_temp)) obs_rates[basename_no_year]['A_' + cat_temp] = tot_yields[sample][cut]['A_bkg'] obs_rates[basename_no_year]['B_' + cat_temp] = tot_yields[sample][cut]['B_bkg'] obs_rates[basename_no_year]['C_' + cat_temp] = tot_yields[sample][cut]['C_bkg'] obs_rates[basename_no_year]['D_' + cat_temp] = tot_yields[sample][cut]['D_bkg'] sig_rates[basename_no_year]['A_' + cat_temp] = tot_yields[sample][cut]['A_sig'] sig_rates[basename_no_year]['B_' + cat_temp] = tot_yields[sample][cut]['B_sig'] sig_rates[basename_no_year]['C_' + cat_temp] = tot_yields[sample][cut]['C_sig'] sig_rates[basename_no_year]['D_' + cat_temp] = tot_yields[sample][cut]['D_sig'] systs[basename_no_year]["bkgA_norm_" + cat_temp] = tot_yields[sample][cut]['A_bkg'] #systs[basename_no_year]["c1_" + cat_temp] = tot_yields[sample][cut]['c1'] #systs[basename_no_year]["c2_" + cat_temp] = tot_yields[sample][cut]['c2'] systs[basename_no_year]["c1_" + cat_temp] = c1s[basename_no_year][cut] systs[basename_no_year]["c2_" + cat_temp] = c2s[basename_no_year][cut] closure_syst = {} if '_1_' in sample: closure_syst = closure_syst_1mm elif '_10_' in sample: closure_syst = closure_syst_10mm elif '_100_' in sample: closure_syst = closure_syst_100mm elif '_1000_' in sample: closure_syst = closure_syst_1000mm systs[basename_no_year]["closure_" + cat_temp] = closure_syst[cat_translator[cut]][year] systs[basename_no_year]["lumi_" + year] = lumi_syst[year] systs[basename_no_year]["trig_MC_" + year] = trig_MC_syst[year] systs[basename_no_year]["trig_data_" + year] = trig_data_syst[year] systs[basename_no_year]["JES_" + year] = JES_syst[year] systs[basename_no_year]["JER_" + year] = JER_syst[year] #systs[basename_no_year]["EGM_ID_" + year] = EGM_ID_syst[year] systs[basename_no_year]["veto_ID_" + year] = veto_ID_syst[year] systs[basename_no_year]["GM_ID_" + cat_temp] = GM_ID_syst[cat_translator[cut]][year] systs[basename_no_year]["dSA_ID_prompt_" + year] = dSA_ID_prompt_syst[year] systs[basename_no_year]["dSA_ID_displ_" + year] = dSA_ID_displ_syst[year] systs[basename_no_year]["dSA_reco_prompt_" + year] = dSA_reco_prompt_syst[year] #! [part1] for name, cat, obs_rate, sig_rate, syst in zip(names.values(), cats.values(), obs_rates.values(), sig_rates.values(), systs.values()): cb = ch.CombineHarvester() cb.SetVerbosity(0) #! [part2] print "cat ", cat cb.AddObservations(["*"], [""], ["13TeV"], [""], cat) cb.AddProcesses( ["*"], [""], ["13TeV"], [""], ["sig"], cat, True) cb.AddProcesses( ["*"], [""], ["13TeV"], [""], ["bkg"], cat, False) cb.cp().ForEachObs(lambda x: x.set_rate(obs_rate[x.bin()])) cb.cp().backgrounds().ForEachProc(lambda x: x.set_rate(1)) cb.cp().signals().ForEachProc(lambda x: x.set_rate(sig_rate[x.bin()])) #! [part2] #! [part3] # Create a unqiue floating parameter in each bin #print 'name ', sample #print 'tot_yields name keys', tot_yields[sample].keys() for y in ['2016', '2017', '2018']: for m in ['0', '1', '2']: if 'A_'+m+'match_'+y not in sig_rate: continue cb.cp().backgrounds().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'bkgA_norm_'+m+'match_'+y, "rateParam", ch.SystMap()(syst['bkgA_norm_'+m+'match_'+y])) cb.cp().backgrounds().bin(['B_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c1_'+m+'match', "rateParam", ch.SystMap()(syst['c1_'+m+'match_'+y])) cb.cp().backgrounds().bin(['C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c2_'+m+'match', "rateParam", ch.SystMap()(syst['c2_'+m+'match_'+y])) cb.cp().backgrounds().bin(['C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c2_'+m+'match', "rateParam", ch.SystMap()(syst['c2_'+m+'match_'+y])) # background systs cb.cp().backgrounds().bin(['C_'+m+'match_'+y]).AddSyst(cb, 'closure_'+m+'match', 'lnN', ch.SystMap()(syst['closure_'+m+'match_'+y])) # signal systs cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'lumi_'+y, 'lnN', ch.SystMap()(syst['lumi_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'trig_MC', 'lnN', ch.SystMap()(syst['trig_MC_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'trig_data', 'lnN', ch.SystMap()(syst['trig_data_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'JES', 'lnN', ch.SystMap()(syst['JES_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'JER_'+y, 'lnN', ch.SystMap()(syst['JER_'+y])) #cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'EGM_ID', 'lnN', ch.SystMap()(syst['EGM_ID_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'veto_ID', 'lnN', ch.SystMap()(syst['veto_ID_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'GM_ID_'+m+'match', 'lnN', ch.SystMap()(syst['GM_ID_'+m+'match_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_ID_prompt', 'lnN', ch.SystMap()(syst['dSA_ID_prompt_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_ID_displ', 'lnN', ch.SystMap()(syst['dSA_ID_displ_'+y])) cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_reco_prompt', 'lnN', ch.SystMap()(syst['dSA_reco_prompt_'+y])) #! [part3] #! [part4] #cb.PrintAll(); #for cb_name, cb in cbs.iteritems(): print ">> Writing datacard for hist: ", name if not os.path.exists(name): os.mkdir("sig_" + name) cb.WriteDatacard("sig_" + str(name) + "/datacard.txt")
def __init__(self, cb=None): super(Datacards, self).__init__() self.cb = cb if self.cb is None: self.cb = ch.CombineHarvester() if log.isEnabledFor(logging.DEBUG): self.cb.SetVerbosity(1) self.configs = datacardconfigs.DatacardConfigs() # common systematics self.lumi_syst_args = [ "lumi_$ERA", "lnN", ch.SystMap("era")(["7TeV", "8TeV"], 1.026)(["13TeV"], 1.046) ] self.electron_efficiency_syst_args = [ "CMS_eff_e", "lnN", ch.SystMap("era")(["7TeV", "8TeV"], 1.02)(["13TeV"], 1.05) # copied from 8TeV ] self.muon_efficiency_syst_args = [ "CMS_eff_m", "lnN", ch.SystMap("era")(["7TeV", "8TeV"], 1.02)(["13TeV"], 1.05) # copied from 8TeV ] self.tau_efficiency_corr_syst_args = [ "CMS_eff_t_$ERA", "lnN", ch.SystMap("era", "channel")(["7TeV", "8TeV"], ["mt", "et"], 1.08)(["7TeV", "8TeV"], ["tt"], 1.19)(["13TeV"], ["mt", "et", "tt"], 1.05) # copied from 8TeV ] self.tau_efficiency_syst_args = [ "CMS_eff_t_$CHANNEL_$ERA", "lnN", ch.SystMap("era", "channel")(["7TeV", "8TeV"], ["mt", "et"], 1.08)(["7TeV", "8TeV"], ["tt"], 1.19)(["13TeV"], ["mt", "et", "tt"], 1.03) # copied from 8TeV ] self.btag_efficiency_syst_args = [ "CMS_eff_b_$ERA", "lnN", ch.SystMap("era", "channel")(["13TeV"], ["mt"], 0.96) # copied from 8TeV (["13TeV"], ["et"], 0.96) # copied from 8TeV (["13TeV"], ["em"], 0.93) # copied from 8TeV (["13TeV"], ["tt"], 0.93) # copied from 8TeV ] self.met_scale_syst_args = [ "CMS_$ANALYSIS_scale_met_$ERA", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ggH", "qqH", "WH", "ZH", "VH"], 0.98) # copied from 8TeV (["13TeV"], ["ZTT", "ZLL", "ZL", "ZJ", "TTJ", "TT", "VV", "WJ", "W"], 1.03) # copied from 8TeV ] self.ztt_cross_section_syst_args = [ "CMS_$ANALYSIS_zttNorm_$ERA", "lnN", ch.SystMap("era", "process")(["7TeV", "8TeV"], ["ZTT", "ZLL", "ZL", "ZJ"], 1.03)(["13TeV"], ["ZTT", "ZLL", "ZL", "ZJ"], 1.04) ] self.ttj_cross_section_syst_args = [ "CMS_$ANALYSIS_ttjNorm_$ERA", "lnN", ch.SystMap("era", "process")(["7TeV"], ["TTJ"], 1.08)(["8TeV"], ["TTJ"], 1.1)(["13TeV"], ["TTJ", "TT"], 1.06) # copied from 8TeV ] self.ttj_extrapol_syst_args = [ "CMS_$ANALYSIS_ttjExtrapol_$ERA", "lnN", ch.SystMap("era", "process")(["7TeV"], ["TTJ"], 1.08)(["8TeV"], ["TTJ"], 1.1)(["13TeV"], ["TTJ", "TT"], 1.10) # copied from 8TeV ] #self.singlet_cross_section_syst_args = [ #"CMS_$ANALYSIS_singletNorm_$ERA", #"lnN", #ch.SystMap("era", "process") #(["13TeV"], [], 1.04) self.vv_cross_section_syst_args = [ "CMS_$ANALYSIS_vvNorm_$ERA", "lnN", ch.SystMap("era", "process")(["7TeV", "8TeV"], ["VV"], 1.15)(["13TeV"], ["VV"], 1.10) # copied from 8TeV ] self.wj_cross_section_syst_args = [ "CMS_$ANALYSIS_wjNorm_$CHANNEL_$ERA", "lnN", ch.SystMap("era", "process", "channel")(["7TeV", "8TeV"], ["WJ"], ["mt", "et"], 1.2)(["13TeV"], ["WJ", "W"], ["mt", "et"], 1.04) # copied from 8TeV ] self.wj_extrapol_syst_args = [ "CMS_$ANALYSIS_wjExtrapol_$CHANNEL_$ERA", "lnN", ch.SystMap("era", "process", "channel")(["7TeV", "8TeV"], ["WJ"], ["mt", "et"], 1.2)(["13TeV"], ["WJ", "W"], ["mt", "et"], 1.2) # copied from 8TeV ] self.qcd_syst_args = [ "CMS_$ANALYSIS_qcdSyst_$BIN_$ERA", "lnN", ch.SystMap("era", "process", "bin")(["13TeV"], ["QCD"], ["mt_inclusive", "et_inclusive"], 1.06) # copied from 8TeV (["13TeV"], ["QCD"], ["mt_0jet_high"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["mt_0jet_low"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["mt_1jet_high"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["mt_1jet_low"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["mt_2jet_vbf"], 1.3) # copied from 8TeV (["13TeV"], ["QCD"], ["et_0jet_high"], 1.06) # copied from 8TeV (["13TeV"], ["QCD"], ["et_0jet_low"], 1.06) # copied from 8TeV (["13TeV"], ["QCD"], ["et_1jet_high"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["et_1jet_low"], 1.1) # copied from 8TeV (["13TeV"], ["QCD"], ["et_2jet_vbf"], 1.3) # copied from 8TeV (["13TeV"], ["QCD"], ["tt_inclusive"], 1.35) # copied from 8TeV ] self.zllFakeTau_syst_args = [ "CMS_$ANALYSIS_eFakeTau_$CHANNEL_$ERA", "lnN", ch.SystMap("era", "process", "channel")(["7TeV", "8TeV"], ["ZLL"], ["mt", "et"], 1.30)(["13TeV"], ["ZLL", "ZL", "ZJ"], ["mt", "tt"], 1.15)(["13TeV"], ["ZLL", "ZL", "ZJ"], ["et"], 1.30) ] self.zee_norm_syst_args = [ "CMS_$ANALYSIS_zeeNorm_$ERA", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ZLL", "ZL"], 1.03) ] self.jec_syst_args = [ "CMS_scale_j_$ERA", "shape", ch.SystMap("era")(["13TeV"], 1.0) ] self.tau_es_syst_args = [ "CMS_scale_t_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["mt"], 1.0)(["13TeV"], ["et"], 1.0)(["13TeV"], ["tt"], 1.0) ] self.ele_es_syst_args = [ "CMS_scale_e_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["em"], 1.0)(["13TeV"], ["et"], 1.0) ] self.probetau_es_syst_args = [ "CMS_scale_probetau_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0) ] self.probeele_es_syst_args = [ "CMS_scale_probeele_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0) ] self.tagele_es_syst_args = [ "CMS_scale_tagele_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0) ] self.massres_syst_args = [ "CMS_scale_massRes_$CHANNEL_$ERA", "shape", ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0) ] # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt1314TeV#s_13_0_TeV self.htt_qcd_scale_syst_args = [ "QCD_scale_$PROCESS", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ggH"], 1.079)(["13TeV"], ["qqH"], 1.007)(["13TeV"], ["VH"], 1.015)(["13TeV"], ["WH"], 1.015)(["13TeV"], ["ZH"], 1.038) ] self.htt_pdf_scale_syst_args = [ "PDF_scale_$PROCESS", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ggH"], 1.071)(["13TeV"], ["qqH"], 1.032)(["13TeV"], ["VH"], 1.022)(["13TeV"], ["WH"], 1.022)(["13TeV"], ["ZH"], 1.022) ] self.ztt_pdf_scale_syst_args = [ "PDF_scale_$PROCESS", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ZTT"], 1.015) ] self.ztt_pdf_scale_syst_args = [ "PDF_scale_$PROCESS", "lnN", ch.SystMap("era", "process")(["13TeV"], ["ZTT"], 1.015) ] # CMS AN-13-262 (v8, table 3) self.htt_ueps_syst_args = [ "UEPS", "lnN", ch.SystMap("era", "process", "bin")(["13TeV"], ["ggH"], ["mt_0jet_high"], 1.060) # copied from 8TeV (["13TeV"], ["ggH"], ["mt_0jet_low"], 1.073) # copied from 8TeV (["13TeV"], ["ggH"], ["mt_1jet_high"], 0.996) # copied from 8TeV (["13TeV"], ["ggH"], ["mt_1jet_low"], 1.007) # copied from 8TeV (["13TeV"], ["ggH"], ["mt_2jet_vbf"], 0.988) # copied from 8TeV (["13TeV"], ["ggH"], ["et_0jet_high"], 1.060) # copied from 8TeV (["13TeV"], ["ggH"], ["et_0jet_low"], 1.073) # copied from 8TeV (["13TeV"], ["ggH"], ["et_1jet_high"], 0.996) # copied from 8TeV (["13TeV"], ["ggH"], ["et_1jet_low"], 1.007) # copied from 8TeV (["13TeV"], ["ggH"], ["et_2jet_vbf"], 0.988) # copied from 8TeV (["13TeV"], ["ggH"], ["em_0jet_high"], 1.063) # copied from 8TeV (["13TeV"], ["ggH"], ["em_0jet_low"], 1.089) # copied from 8TeV (["13TeV"], ["ggH"], ["em_1jet_high"], 1.004) # copied from 8TeV (["13TeV"], ["ggH"], ["em_1jet_low"], 1.000) # copied from 8TeV (["13TeV"], ["ggH"], ["em_2jet_vbf"], 0.988) # copied from 8TeV (["13TeV"], ["ggH"], ["tt_inclusive"], 1.025) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_0jet_high"], 1.028) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_0jet_low"], 1.018) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_1jet_high"], 0.954) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_1jet_low"], 0.946) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_2jet_vbf"], 0.893) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_0jet_high"], 1.028) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_0jet_low"], 1.018) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_1jet_high"], 0.954) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_1jet_low"], 0.946) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_2jet_vbf"], 0.893) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_0jet_high"], 1.042) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_0jet_low"], 1.035) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_1jet_high"], 0.978) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_1jet_low"], 0.984) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_2jet_vbf"], 0.893) # copied from 8TeV (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["tt_inclusive"], 1.025) # copied from 8TeV ]
def main(): if len(sys.argv) < 2: print 'Error! No data filename specified (in json format). Exiting...' cat_translator = {u'28': '0', u'29': '1', u'30': '2'} with open(sys.argv[1]) as f: data = json.load(f) tot_yields = {} for name,yields in data.iteritems(): if 'vtx_sign' not in name: continue print "name ", name sample = name.split('_sig_')[-1] cut = [token for token in name.split('_') if 'cut' in token][0] print 'cut ', cut if '28' not in cut and '29' not in cut and '30' not in cut: continue if sample not in tot_yields: tot_yields[sample] = {} cut_num = cut.split('cut')[-1] if cut_num not in tot_yields[sample]: tot_yields[sample][cut_num] = {} tot_yields[sample][cut_num]['A_sig'] = yields["A_sig"] tot_yields[sample][cut_num]['B_sig'] = yields["B_sig"] tot_yields[sample][cut_num]['C_sig'] = yields["C_sig"] tot_yields[sample][cut_num]['D_sig'] = yields["D_sig"] tot_yields[sample][cut_num]['A_bkg'] = max(yields["A_bkg"], 0.1) tot_yields[sample][cut_num]['B_bkg'] = max(yields["B_bkg"], 0.1) tot_yields[sample][cut_num]['C_bkg'] = max(yields["C_bkg"], 0.1) tot_yields[sample][cut_num]['D_bkg'] = max(yields["D_bkg"], 0.1) tot_yields[sample][cut_num]['c1'] = tot_yields[sample][cut_num]['B_bkg'] / tot_yields[sample][cut_num]['A_bkg'] tot_yields[sample][cut_num]['c2'] = tot_yields[sample][cut_num]['C_bkg'] / tot_yields[sample][cut_num]['A_bkg'] #! [part1] # Define four categories labeled A, B, C and D, and # set the observed yields in a map. for sample in tot_yields: cats = [] obs_rates = {} sig_rates = {} for cut in tot_yields[sample]: cat = cat_translator[cut] + 'match' cats.append('A_' + cat) cats.append('B_' + cat) cats.append('C_' + cat) cats.append('D_' + cat) obs_rates['A_' + cat] = tot_yields[sample][cut]['A_bkg'] obs_rates['B_' + cat] = tot_yields[sample][cut]['B_bkg'] obs_rates['C_' + cat] = tot_yields[sample][cut]['C_bkg'] obs_rates['D_' + cat] = tot_yields[sample][cut]['D_bkg'] sig_rates['A_' + cat] = tot_yields[sample][cut]['A_sig'] sig_rates['B_' + cat] = tot_yields[sample][cut]['B_sig'] sig_rates['C_' + cat] = tot_yields[sample][cut]['C_sig'] sig_rates['D_' + cat] = tot_yields[sample][cut]['D_sig'] cats_with_number = [] for num in range(0, len(cats)): cats_with_number.append((num, cats[num])) #! [part1] #! [part2] cb = ch.CombineHarvester() cb.SetVerbosity(0) cb.AddObservations(["*"], [""], ["13TeV"], [""], cats_with_number) cb.AddProcesses( ["*"], [""], ["13TeV"], [""], ["sig"], cats_with_number, True) cb.AddProcesses( ["*"], [""], ["13TeV"], [""], ["bkg"], cats_with_number, False) cb.cp().ForEachObs(lambda x: x.set_rate(obs_rates[x.bin()])) #cb.cp().ForEachObs([&](ch::Observation *x) { # x->set_rate(obs_rates[x->bin()]); # }); cb.cp().backgrounds().ForEachProc(lambda x: x.set_rate(1)) #cb.cp().backgrounds().ForEachProc([](ch::Process *x) { # x->set_rate(1); # }); cb.cp().signals().ForEachProc(lambda x: x.set_rate(sig_rates[x.bin()])) #cb.cp().signals().ForEachProc([&](ch::Process *x) { # x->set_rate(sig_rates[x->bin()]); # }); #! [part2] #! [part3] # Create a unqiue floating parameter in each bin print 'name ', sample print 'tot_yields name keys', tot_yields[sample].keys() cb.cp().backgrounds().bin(["A_0match", "B_0match", "C_0match", "D_0match"]).AddSyst(cb, "bkgA_norm_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['A_bkg'])) cb.cp().backgrounds().bin(["B_0match", "D_0match"]).AddSyst(cb, "c1_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['c1'])) cb.cp().backgrounds().bin(["C_0match", "D_0match"]).AddSyst(cb, "c2_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['c2'])) cb.cp().backgrounds().bin(["A_1match", "B_1match", "C_1match", "D_1match"]).AddSyst(cb, "bkgA_norm_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['A_bkg'])) cb.cp().backgrounds().bin(["B_1match", "D_1match"]).AddSyst(cb, "c1_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['c1'])) cb.cp().backgrounds().bin(["C_1match", "D_1match"]).AddSyst(cb, "c2_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['c2'])) cb.cp().backgrounds().bin(["A_2match", "B_2match", "C_2match", "D_2match"]).AddSyst(cb, "bkgA_norm_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['A_bkg'])) cb.cp().backgrounds().bin(["B_2match", "D_2match"]).AddSyst(cb, "c1_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['c1'])) cb.cp().backgrounds().bin(["C_2match", "D_2match"]).AddSyst(cb, "c2_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['c2'])) #! [part3] #! [part4] #cb.PrintAll(); print ">> Writing datacard for hist: ", sample if not os.path.exists(sample): os.mkdir("sig_" + sample) cb.WriteDatacard("sig_" + str(sample) + "/datacard.txt")
def create_datacards(channel, method): backgrounds = {"ZTT": "ztt", "VV": "vv", "W": "wj", "QCD": "qcd"} backgrounds.update({ "TT": "ttj", "ZLL": "zll" } if channel == "em" else { "TTT": "ttt", "TTJJ": "ttjj", "ZL": "zl", "ZJ": "zj" }) ##Combine harvester instance cb = ch.CombineHarvester() #Instance for extracting histograms sample_settings = samples.Samples() config_list = [] ##weights cut_info = yaml.load( open(os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/cuts.yaml", "r")) parameter_info = yaml.load( open( os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/parameter.yaml", "r")) weights = [] for index, category in enumerate( ["(njetspt30==0)", "(njetspt30==1)", "(njetspt30>1)"]): #, "(nbtag==2)"]): #cut_strings = [parameter_info[param][4] for param in cut_info[index][channel].keys()] #cut_values, cut_side = [[entry[index2] for entry in cut_info[index][channel].values()] for index2 in [0,1]] weights.append( { #"cut_based": "*".join([cut_strings[index2].format(side = side, cut = value) for index2, (side, value) in enumerate(zip(cut_side, cut_values))] + [category]), "cut_BDT": "(BDT_forcut_score>0.7)*" + category, "cut_Ada_BDT": "(BDT_Ada_forcut_score>0.0)*" + category, "BDT": category, "Ada_BDT": category }) ##Fill combine harvester with categories/processes for category in categories + controlregions: ##Add data/signal cb.AddObservations(["*"], ["lfv"], ["13TeV"], [channel], [category]) if not "CR" in category[1]: cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], ["Z" + channel.upper()], [category], True) ##Config for each category config = sample_settings.get_config( [ getattr(samples.Samples, sample) for sample in data.values() + { True: ["z" + channel], False: [] }["CR" not in category[1]] + backgrounds.values() ], channel, None, estimationMethod="new", weight=weights[category[0]][method]) config.pop("legend_markers") config += { "filename": "input_" + method + "_nominal_" + category[1], "plot_modules": ["ExportRoot"], "file_mode": "UPDATE", "directories": os.environ["MCPATH"], "x_expressions": x[method], "x_bins": x_bins[method], "output_dir": output_dir + channel, "no_cache": True } config["labels"] = [ category[1] + "/" + process for process in data.keys() + { True: ["Z" + channel.upper()], False: [] }["CR" not in category[1]] + backgrounds.keys() ] config_list.append(config) for process in backgrounds.keys(): ##Add background cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], [process], [category], False) ##Fill combine with control regions for CR in controlregions: cb.cp().channel([channel]).bin([category[1]]).AddSyst( cb, "scale_" + category[1].remove("_CR"), "rateParam", ch.SystMap()) for category in catogories: cb.cp().bin([category[0] ]).AddSyst(cb, "scale_" + category[1].remove("_CR"), "rateParam", ch.SystMapFunc()) ##Fill combine harvester with systematics systematics_list = SystLib.SystematicLibary() systematics_factory = systematics.SystematicsFactory() for (systematic, process, category) in systematics_list.get_LFV_systs( channel, lnN=True) + systematics_list.get_LFV_systs(channel, shape=True): cb.cp().channel([channel]).process(process).AddSyst(cb, *systematic) if "W" in process and "QCD" not in process: process.append("QCD") if "QCD" in process and "W" not in process: process.append("W") if systematic[1] == "shape": ##Config for each systematic shift: for category in categories + controlregions: if "CR" in category[1] and "Z" + channel.upper() in process: process.remove("Z" + channel.upper()) for shift in ["Down", "Up"]: config = sample_settings.get_config( [ getattr(samples.Samples, dict(signals, **backgrounds)[sample]) for sample in process ], channel, None, estimationMethod="new", weight=weights[category[0]][method]) config.pop("legend_markers") config += { "filename": "input_" + method + "_" + systematic[0].replace( "$ERA", "13TeV").replace("$CHANNEL", channel) + shift + "_" + category[1], "plot_modules": ["ExportRoot"], "file_mode": "UPDATE", "directories": os.environ["MCPATH"], "x_expressions": x[method], "x_bins": x_bins[method], "output_dir": output_dir + channel, "no_cache": True } config["labels"] = [ category[1] + "/" + proc + "_" + systematic[0].replace( "$ERA", "13TeV").replace("$CHANNEL", channel) + shift for proc in process ] if systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel) == "CMS_scale_j_13TeV": systematics_settings = systematics_factory.get( systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel))(config, "Total") else: systematics_settings = systematics_factory.get( systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel))(config) config = systematics_settings.get_config(1 if shift == "Up" else -1) config_list.append(config) pool = Pool(cpu_count()) for config in config_list: pool.apply_async(harry_do_your_job, args=(config, )) pool.close() pool.join() os.system("hadd {target}.root {root_files}*.root".format( target=output_dir + channel + "/input_" + method, root_files=output_dir + channel + "/input_" + method)) ##Fill combine harvester with the shapes which were extracted before from harry.py cb.cp().backgrounds().ExtractShapes( output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC") cb.cp().signals().ExtractShapes( output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC") #Write datacard and call combine cb.WriteDatacard( output_dir + channel + "/combined_" + method + ".txt", output_dir + channel + "/combined_datacard_" + method + ".root") for category in categories: cb_copy = cb.cp() cb_copy.FilterAll(lambda obj: obj.bin() != category[1]) cb_copy.WriteDatacard( output_dir + channel + "/" + category[1] + "_" + method + ".txt", output_dir + channel + "/" + category[1] + "_datacard_" + method + ".root")