예제 #1
0
    def harvestEm(self,channel='wenu',charge='both'):
        cmb = ch.CombineHarvester()

        # Read all the cards.
        # CH stores metadata about each object (Observation, Process, Systematic),
        # this is extracted from the card names with some regex
        for card in glob.glob(self.bindir+('/%s_mass*.txt' % channel)):
            cmb.QuickParseDatacard(card, """%s_mass(?<MASS>\d+)_$CHANNEL.card.txt""" % channel)

        # Need a unqiue bin name for each plus/minus,pt and eta combination
        # We extracted this part of the datacard name into the channel variable above,
        # so can just copy it and override the specific bin name that was in all the cards
        cmb.ForEachObj(lambda obj: obj.set_bin(obj.channel()))

        # We'll have three copies of the observation, one for each mass point.
        # Filter all but one copy.
        cmb.FilterObs(lambda obj: obj.mass() != '%d' % self.mwcentral)

        # Create workspace to hold the morphing pdfs and the mass
        w = ROOT.RooWorkspace('morph', 'morph')
        mass = w.factory('mw[{mwrange}]'.format(mwrange=self.mwrange))

        # BuildRooMorphing will dump a load of debug plots here
        debug = ROOT.TFile(self.bindir+'/debug.root', 'RECREATE')

        # Run for each bin,process combination (only for signal!)
        for b in cmb.bin_set():
            for p in cmb.cp().bin([b]).signals().process_set():
                morphing.BuildRooMorphing(w, cmb, b, p, mass, verbose=True, file=debug)

        # Just to be safe
        mass.setConstant(True)

        # Now the workspace is copied into the CH instance and the pdfs attached to the processes
        # (this relies on us knowing that BuildRooMorphing will name the pdfs in a particular way)
        cmb.AddWorkspace(w, True)
        cmb.cp().process(['W']).ExtractPdfs(cmb, 'morph', '$BIN_$PROCESS_morph', '')

        # Adjust the rateParams a bit - we currently have three for each bin (one for each mass),
        # but we only want one. Easiest to drop the existing ones completely and create new ones
        cmb.syst_type(['rateParam'], False)
        cmb.cp().process(['W']).AddSyst(cmb, 'norm_$BIN', 'rateParam', ch.SystMap()(1.00))

        # Have to set the range by hand
        for sys in cmb.cp().syst_type(['rateParam']).syst_name_set():
            cmb.GetParameter(sys).set_range(0.5, 1.5)

        # Print the contents of the model
        cmb.PrintAll()

        # Write out the cards, one per bin
        outdir=self.bindir+'/wenu_cards_morphed_{charge}'.format(charge=charge)
        writer = ch.CardWriter('$TAG/$BIN.txt', '$TAG/shapes.root')
        writer.SetVerbosity(1)
        writer.WriteCards(outdir, cmb)
예제 #2
0
    def __init__(self, input_filename):
        if not os.path.exists(input_filename):
            logger.fatal("File %s does not exist.", input_filename)
            raise Exception
        self._input_filename = input_filename

        self._shapes = self._get_shapes()
        logger.info("Found %d shapes in input file %s.",
                    len(self._shapes), self._input_filename)
        self._cb = ch.CombineHarvester()
        self._shapes_extracted = False
	def __init__(self, cb=None):
		super(Datacards, self).__init__()

		self.cb = cb
		if self.cb is None:
			self.cb = ch.CombineHarvester()
		if log.isEnabledFor(logging.DEBUG):
			self.cb.SetVerbosity(1)

		self.configs = datacardconfigs.DatacardConfigs()
		
		self.stable_options = r"--robustFit 1 --preFitValue 1.0 --cminDefaultMinimizerType Minuit2 --cminDefaultMinimizerAlgo Minuit2 --cminDefaultMinimizerStrategy 0 --cminFallbackAlgo Minuit2,0:1.0"
예제 #4
0
def main():
    options = options_()
    for cutkey in options.cut:
        print 'cutkey : ', cutkey
        ### get M_A and M_H ###
        mH = float(options.mH_list[cutkey])
        mA = float(options.mA_list[cutkey])
        print mH, mA
        """Main function"""
        # start the timer
        tstart = datetime.now()
        print 'starting...'
        # get the options
        #options = get_options()

        intL = options.lumi  # in pb-1
        #tag = 'v1.2.0+7415-19-g7bbca78_ZAAnalysis_1a69757'
        #path = '/nfs/scratch/fynu/amertens/cmssw/CMSSW_7_4_15/src/cp3_llbb/CommonTools/histFactory/16_01_28_syst/build'
        tag = 'v1.1.0+7415-83-g2a9f912_ZAAnalysis_2ff9261'
        #tag = 'v1.1.0+7415-57-g4bff5ea_ZAAnalysis_b1377a8'
        path = options.path
        CHANNEL = options.CHANNEL
        ERA = options.ERA
        MASS = str(mH) + "_" + str(mA)
        ANALYSIS = options.ANALYSIS
        DEBUG = 0

        c = ch.CombineHarvester()
        cats = [(0, "mmbbSR" + cutkey), (1, "mll_mmbbBR" + cutkey),
                (2, "eebbSR" + cutkey), (3, "mll_eebbBR" + cutkey)]

        bins = {}
        bins['signalregion_mm'] = "mmbbSR" + cutkey
        bins['mll_bkgregion_mm'] = "mll_mmbbBR" + cutkey
        bins['signalregion_ee'] = "eebbSR" + cutkey
        bins['mll_bkgregion_ee'] = "mll_eebbBR" + cutkey

        processes = {}
        p = Process('data_obs')
        #DoubleMuon_Run2015D_v1.1.0+7415-57-g4bff5ea_ZAAnalysis_b1377a8_histos.root
        p.prepare_process(path, 'data_obs', 'DoubleMuon_DoubleEG_Run2015D',
                          tag)
        processes['data_obs'] = p
        if DEBUG: print p
        # define signal
        # define backgrounds
        # zz
        p = Process('zz')
        p.prepare_process(
            path, 'zz',
            'ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8_MiniAODv2', tag)
        processes['zz'] = p
        if DEBUG: print p

        # ttbar
        p = Process('ttbar')
        p.prepare_process(path, 'ttbar', 'TTTo2L2Nu_13TeV-powheg_MiniAODv2',
                          tag)
        processes['ttbar'] = p
        p = Process('ttbar')
        if DEBUG: print p
        '''
    # drell-yan
    p = Process('dy1')
    p.prepare_process(path, 'dy1', 'DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX_MiniAODv2', tag)
    processes['dy1'] = p
    if DEBUG: print p
    '''
        p = Process('dy2')
        p.prepare_process(
            path, 'dy2',
            'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX_MiniAODv2', tag)
        processes['dy2'] = p
        if DEBUG: print p

        c.AddObservations([MASS], [ANALYSIS], [ERA], [CHANNEL], cats)
        c.AddProcesses([MASS], [ANALYSIS], [ERA], [CHANNEL], ['ZA'], cats,
                       True)
        c.AddProcesses([MASS], [ANALYSIS], [ERA], [CHANNEL],
                       ['ttbar', 'dy2', 'zz'], cats, False)
        c.cp().process(['ttbar', 'dy2', 'ZA']).AddSyst(
            c, "lumi", "lnN",
            ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA],
                                                   [0, 1, 2, 3], 1.046))

        c.cp().process(['ttbar', 'dy2', 'ZA']).AddSyst(
            c, "trig", "lnN",
            ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA],
                                                   [0, 1, 2, 3], 1.04))

        c.cp().process(['ttbar', 'dy2']).AddSyst(c, "btag", "shape",
                                                 ch.SystMap()(1.0))

        c.cp().process(['ttbar', 'dy2']).AddSyst(c, "jec", "shape",
                                                 ch.SystMap()(1.0))

        c.cp().process(['ttbar', 'dy2']).AddSyst(c, "jer", "shape",
                                                 ch.SystMap()(1.0))

        c.cp().process(['ttbar', 'dy2']).AddSyst(c, "pu", "shape",
                                                 ch.SystMap()(1.0))

        c.cp().process(['ttbar']).AddSyst(c, "TTpdf", "shape",
                                          ch.SystMap()(1.0))

        c.cp().process(['dy2']).AddSyst(c, "DYpdf", "shape", ch.SystMap()(1.0))

        c.cp().process(['dy2']).AddSyst(
            c, "DYnorm", "lnN",
            ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0, 1],
                                                   1.1))

        c.cp().process(['ttbar']).AddSyst(
            c, "TTnorm", "lnN",
            ch.SystMap('channel', 'era', 'bin_id')([CHANNEL], [ERA], [0], 1.1))

        nChannels = len(bins)
        nBackgrounds = len(
            [processes[x] for x in processes if processes[x].type > 0])
        nNuisances = 1

        systematics = {
            '': '',
            '_btagUp': '__btagup',
            '_btagDown': '__btagdown',
            '_jecUp': '__jecup',
            '_jecDown': '__jecdown',
            '_jerUp': '__jerup',
            '_jerDown': '__jerdown',
            '_puUp': '__puup',
            '_puDown': '__pudown',
            '_TTpdfUp': '__pdfup',
            '_TTpdfDown': '__pdfdown',
            '_DYpdfUp': '__pdfup',
            '_DYpdfDown': '__pdfdown'
        }
        outputRoot = "shapes.root"
        f = TFile(outputRoot, "recreate")
        f.Close()
        for b in bins:
            print b, bins[b]
            for p in processes:
                if p == 'data_obs':
                    file_in = TFile(processes[p].file, "READ")
                    print " Getting ", bins[b], " in file ", processes[p].file
                    h = file_in.Get(bins[b])
                    h.SetDirectory(0)
                    file_in.Close()
                    f = TFile(outputRoot, "update")
                    h.SetName("hist_" + bins[b] + "_" + p)
                    h.Write()
                    f.Write()
                    f.Close()

                else:
                    for s1, s2 in systematics.iteritems():
                        file_in = TFile(processes[p].file, "READ")
                        print " Getting ", bins[
                            b] + s2, " in file ", processes[p].file
                        h = file_in.Get(bins[b] + s2)
                        h.SetDirectory(0)
                        file_in.Close()
                        f = TFile(outputRoot, "update")
                        h.SetName("hist_" + bins[b] + "_" + p + s1)
                        h.Sumw2()
                        #h.Scale(processes[p].xsection * intL / processes[p].sumW)
                        h.Scale(intL)
                        h.Write()
                        f.Write()
                        f.Close()

        # Fill signal histograms FIXME: read efficiencies from eff.root

        eff_file = TFile("eff.root", "READ")
        effee_hist = eff_file.Get("effee")
        eff_ee = effee_hist.Interpolate(mA, mH)
        effmm_hist = eff_file.Get("effmm")
        eff_mm = effmm_hist.Interpolate(mA, mH)

        print "lumi : ", options.lumifb
        print "eff at ", mA, mH, ":", eff_ee, eff_mm
        print "ZA yields: ", options.lumifb * eff_mm, options.lumifb * eff_ee

        f = TFile(outputRoot, "update")
        h1 = TH1F("hist_" + bins['signalregion_mm'] + "_ZA",
                  "hist_" + bins['signalregion_mm'] + "_ZA", 1, 0, 1)
        h1.Fill(0.5, options.lumifb * eff_mm)
        h1.Write()

        h2 = TH1F("hist_" + bins['mll_bkgregion_mm'] + "_ZA",
                  "hist_" + bins['mll_bkgregion_mm'] + "_ZA", 60, 60, 120)
        h2.Write()

        h3 = TH1F("hist_" + bins['signalregion_ee'] + "_ZA",
                  "hist_" + bins['signalregion_ee'] + "_ZA", 1, 0, 1)
        h3.Fill(0.5, options.lumifb * eff_ee)
        h3.Write()

        h4 = TH1F("hist_" + bins['mll_bkgregion_ee'] + "_ZA",
                  "hist_" + bins['mll_bkgregion_ee'] + "_ZA", 60, 60, 120)
        h4.Write()

        f.Write()
        f.Close()

        c.cp().backgrounds().ExtractShapes(outputRoot, "hist_$BIN_$PROCESS",
                                           "hist_$BIN_$PROCESS_$SYSTEMATIC")
        c.cp().signals().ExtractShapes(outputRoot, "hist_$BIN_$PROCESS",
                                       "hist_$BIN_$PROCESS_$SYSTEMATIC")
        writer = ch.CardWriter(
            '$TAG/$MASS/$ANALYSIS_$CHANNEL_$ERA.dat',
            '$TAG/common/$ANALYSIS_$CHANNEL_$MASS.input_$ERA.root')
        writer.WriteCards('CARDS/', c)
예제 #5
0
ROOT.gSystem.Load('libHiggsAnalysisCombinedLimit')


### WORKSPACE PATH  [CHANGE BOTH WORKSPACE AND MLFIT ROOT FILE AT THE SAME TIME]
### ======= CONSTRAINED =========
# fin = ROOT.TFile('output/CONSTRAINED-svfit/cmb/wsp.root')
# fin = ROOT.TFile('output/CONSTRAINED-mvis/cmb/wsp.root')

### ======= UNCONSTRAINED =========
# fin = ROOT.TFile('output/LIMITS-svfit/cmb/wsp.root')
fin = ROOT.TFile('workspace.root')

wsp = fin.Get('w')

cmb = ch.CombineHarvester()
cmb.SetFlag("workspaces-use-clone", True)
ch.ParseCombineWorkspace(cmb, wsp, 'ModelConfig', 'data_obs', False)

### MLFIT ROOT FILE PATH [CHANGE BOTH WORKSPACE AND MLFIT ROOT FILE AT THE SAME TIME]
### ======= CONSTRAINED =========
# mlf = ROOT.TFile('output/CONSTRAINED-svfit/cmb/mlfit.Test.root')
# mlf = ROOT.TFile('output/CONSTRAINED-mvis/cmb/mlfit.Test.root')

### ======= UNCONSTRAINED =========
# mlf = ROOT.TFile('output/LIMITS-svfit/cmb/mlfit.Test.root')
mlf = ROOT.TFile('mlfit.root')



예제 #6
0
def prepareShapes(backgrounds, signals, discriminant, discriminantName):
    # Backgrounds is a list of string of the considered backgrounds corresponding to entries in processes_mapping
    # Signals is a list of string of the considered signals corresponding to entries in processes_mapping
    # discriminant is the corresponding entry in the dictionary discriminants

    import CombineHarvester.CombineTools.ch as ch
    root_path = options.root_path

    file, systematics = prepareFile(processes_mapping, discriminants,
                                    root_path, discriminantName)
    if options.dataYear != '2016':
        call([
            'python', 'symmetrize.py', options.output, file, options.dataYear
        ],
             shell=False)

    for signal in signals:
        cb = ch.CombineHarvester()
        cb.AddObservations(['*'], [''], ['_%s' % options.dataYear], [''],
                           discriminant)
        cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''],
                        [signal], discriminant, True)
        #cb.AddProcesses(['*'], [''], ['_%s'%options.dataYear], [''], backgrounds, discriminant, False)
        if options.dataYear == '2016':
            cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''],
                            backgrounds, discriminant, False)
        else:
            if not 'b2j3' in discriminantName:
                try:
                    backgrounds.remove('qcd')
                except:
                    pass
            else:
                if not 'qcd' in backgrounds: backgrounds.append('qcd')
            if 'all' in discriminantName:
                if signal == 'Hut':
                    discriminant.remove((1, 'DNN_Hut_b2j3'))
                    cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear],
                                    [''], backgrounds + ['qcd'],
                                    [(1, 'DNN_Hut_b2j3')], False)
                else:
                    discriminant.remove((1, 'DNN_Hct_b2j3'))
                    cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear],
                                    [''], backgrounds + ['qcd'],
                                    [(1, 'DNN_Hct_b2j3')], False)
                cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''],
                                backgrounds, discriminant, False)
                if signal == 'Hut': discriminant.append((1, 'DNN_Hut_b2j3'))
                else: discriminant.append((1, 'DNN_Hct_b2j3'))
            else:
                cb.AddProcesses(['*'], [''], ['_%s' % options.dataYear], [''],
                                backgrounds, discriminant, False)

        # Systematics
        if not options.nosys:
            for systematic in systematics:
                systematic_only_for_SMtt = False
                systematic_only_for_Sig = False

                for systSMtt in options.sysForSMtt:
                    if CMSNamingConvention(systSMtt) == systematic:
                        systematic_only_for_SMtt = True
                for systSig in options.sysForSig:
                    if CMSNamingConvention(systSig) == systematic:
                        systematic_only_for_Sig = True

                if not systematic_only_for_SMtt and not systematic_only_for_Sig:
                    cb.cp().AddSyst(cb, systematic, 'shape',
                                    ch.SystMap()(1.00))
                elif systematic_only_for_SMtt and not systematic_only_for_Sig:
                    cb.cp().AddSyst(cb, systematic, 'shape',
                                    ch.SystMap('process')(smTTlist, 1.00))
                    #if 'hdamp' in systematic:
                    #    for i in xrange(len(discriminant)):
                    #        if 'b2j3' in discriminant[i][1]:
                    #            cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttlf'], 1.00))
                    #            cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb','ttcc'], 1.05))
                    #        elif 'b2j4' in discriminant[i][1]:
                    #            cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], smTTlist, 1.00))
                    #        elif 'b3j3' in discriminant[i][1]:
                    #            cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttlf'], 1.00))
                    #            cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb','ttcc'], 1.05))
                    #        elif 'b3j4' in discriminant[i][1]:
                    #            cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], smTTlist, 1.00))
                    #        elif 'b4j4' in discriminant[i][1]:
                    #            cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.00))
                    #            cb.cp().AddSyst(cb, systematic, 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc','ttlf'], 1.05))
                    #else: cb.cp().AddSyst(cb, systematic, 'shape', ch.SystMap('process')(smTTlist, 1.00))
                elif not systematic_only_for_SMtt and systematic_only_for_Sig:
                    cb.cp().AddSyst(cb, systematic, 'shape',
                                    ch.SystMap('process')([signal], 1.00))
                else:
                    cb.cp().AddSyst(
                        cb, systematic, 'shape',
                        ch.SystMap('process')(smTTlist + [signal], 1.00))

            #Lumi corr. https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM#LumiComb
            #cb.cp().AddSyst(cb, 'CMS_lumi', 'lnN', ch.SystMap()(options.luminosityError))
            if options.dataYear == '2016':
                cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2016', 'lnN',
                                ch.SystMap()(1.01))
                cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN',
                                ch.SystMap()(1.006))
                #reproducing 2016
                #cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2016', 'lnN', ch.SystMap()(1.027))
            elif options.dataYear == '2017':
                cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2017', 'lnN',
                                ch.SystMap()(1.02))
                cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN',
                                ch.SystMap()(1.009))
                cb.cp().AddSyst(cb, 'CMS_lumi_corr_1718', 'lnN',
                                ch.SystMap()(1.006))
            elif options.dataYear == '2018':
                cb.cp().AddSyst(cb, 'CMS_lumi_uncorr_2018', 'lnN',
                                ch.SystMap()(1.015))
                cb.cp().AddSyst(cb, 'CMS_lumi_corr_161718', 'lnN',
                                ch.SystMap()(1.02))
                cb.cp().AddSyst(cb, 'CMS_lumi_corr_1718', 'lnN',
                                ch.SystMap()(1.002))

            cb.cp().AddSyst(
                cb, 'tt_xsec', 'lnN',
                ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.055))
            cb.cp().AddSyst(cb, 'Other_xsec', 'lnN',
                            ch.SystMap('process')(['other'], 1.1))
            #cb.cp().AddSyst(cb, 'hdamp', 'lnN', ch.SystMap('process')(smTTlist, 1.05))
            #cb.cp().AddSyst(cb, 'TuneCP5', 'lnN', ch.SystMap('process')(smTTlist, 1.03))

            for i in xrange(len(discriminant)):
                if 'b2j3' in discriminant[i][1]:
                    cb.cp().AddSyst(cb, '$PROCESS_norm', 'lnN',
                                    ch.SystMap('process')(['qcd'], 1.5))
                #reproducing 2016 ### comment out Other_xsec above!
                #if 'b2j3' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b2j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1))
                #if 'b2j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b2j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1))
                #if 'b3j3' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b3j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1))
                #if 'b3j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b3j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1))
                #if 'b4j4' in discriminant[i][1]: cb.cp().AddSyst(cb, 'Other_xsec_b4j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['other'], 1.1))

            if options.dataYear == '2016':
                #reproducing 2016
                #cb.cp().AddSyst(cb, 'hdamp_2016', 'lnN', ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.05))
                #cb.cp().AddSyst(cb, 'scale_2016', 'lnN', ch.SystMap('process')(['ttbb', 'ttcc', 'ttlf'], 1.15))
                #for i in xrange(len(discriminant)):
                #    if 'j3' in discriminant[i][1]:
                #        cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5))
                #        cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                #        cb.cp().AddSyst(cb, 'jec_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb', 'ttcc', 'ttlf', 'other', signal], 1.01))
                #    else:
                #        cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5))
                #        cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                #        cb.cp().AddSyst(cb, 'jec_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb', 'ttcc', 'ttlf', 'other', signal], 1.05))#1.05 for j4
                for i in xrange(len(discriminant)):
                    if 'b2' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b2_2016', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.3))
                        #cb.cp().AddSyst(cb, '$PROCESS_norm_b2_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b2', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))
                    elif 'b3' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b3_2016', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.3))
                        #cb.cp().AddSyst(cb, '$PROCESS_norm_b3_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b3', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))
                    elif 'b4' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b4_2016', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.3))
                        #cb.cp().AddSyst(cb, '$PROCESS_norm_b4_2016', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b4', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))
            else:
                for i in xrange(len(discriminant)):
                    if 'b2' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b2', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.2))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b2', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))
                    elif 'b3' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b3', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.22))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b3', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))
                    elif 'b4' in discriminant[i][1]:
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b4', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttbb'], 1.2))
                        cb.cp().AddSyst(
                            cb, '$PROCESS_norm_b4', 'lnN',
                            ch.SystMap('bin', 'process')([discriminant[i][1]],
                                                         ['ttcc'], 1.5))

                #if 'j3' in discriminant[i][1]:
                #    #cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5))
                #    cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3))
                #    cb.cp().AddSyst(cb, '$PROCESS_norm_j3', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))
                #else:
                #    #cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.5))
                #    cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttbb'], 1.3))
                #    cb.cp().AddSyst(cb, '$PROCESS_norm_j4', 'lnN', ch.SystMap('bin', 'process')([discriminant[i][1]], ['ttcc'], 1.5))

        # Import shapes from ROOT file
        cb.cp().backgrounds().ExtractShapes(file, '$BIN/$PROCESS',
                                            '$BIN/$PROCESS__$SYSTEMATIC')
        cb.cp().signals().ExtractShapes(file, '$BIN/$PROCESS',
                                        '$BIN/$PROCESS__$SYSTEMATIC')

        #reproducing 2016 - comment out
        if options.dataYear == '2016':
            rebin = ch.AutoRebin().SetBinThreshold(100).SetBinUncertFraction(
                0.1)
            rebin.Rebin(cb.cp(), cb)

        #elif options.dataYear == '2017':
        #    #rebin_b2j3 = ch.AutoRebin().SetBinThreshold(5400)#.SetBinUncertFraction(0.1)
        #    #rebin_b2j3.Rebin(cb.cp().bin(["DNN_Hut_b2j3", "DNN_Hct_b2j3"]), cb)

        #AutoMCStat
        cb.SetAutoMCStats(cb, 0.1)
        #reproducing 2016
        #print "Treating bbb"
        #bbb = ch.BinByBinFactory()
        #bbb.SetAddThreshold(0.0001)
        #bbb.AddBinByBin(cb.cp().backgrounds(), cb)
        #bbb.AddBinByBin(cb.cp().signals(), cb)

        output_prefix = 'FCNC_%s_Discriminant_%s' % (signal, discriminantName)

        output_dir = os.path.join(options.output, '%s' % (signal))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        fake_mass = '125'

        # Write card
        datacard = os.path.join(output_dir, output_prefix + '.dat')
        cb.cp().mass([fake_mass, "*"]).WriteDatacard(
            os.path.join(output_dir, output_prefix + '.dat'),
            os.path.join(output_dir, output_prefix + '_shapes.root'))

        # Write small script to compute the limit
        workspace_file = os.path.basename(
            os.path.join(output_dir,
                         output_prefix + '_combine_workspace.root'))
        script = """#! /bin/bash

text2workspace.py {datacard} -m {fake_mass} -o {workspace_root}

# Run limit

echo combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run blind #-v +2
#combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run expected #-v +2
combine -M AsymptoticLimits -n {name} {workspace_root} -S {systematics} #--run blind #-v +2
#combine -H AsymptoticLimits -M HybridNew -n {name} {workspace_root} -S {systematics} --LHCmode LHC-limits --expectedFromGrid 0.5 #for ecpected, use 0.84 and 0.16
""".format(workspace_root=workspace_file,
           datacard=os.path.basename(datacard),
           name=output_prefix,
           fake_mass=fake_mass,
           systematics=(0 if options.nosys else 1))
        script_file = os.path.join(output_dir,
                                   output_prefix + '_run_limits.sh')
        with open(script_file, 'w') as f:
            f.write(script)

        st = os.stat(script_file)
        os.chmod(script_file, st.st_mode | stat.S_IEXEC)

        # Write small script for datacard checks
        script = """#! /bin/bash

# Run checks
echo combine -M FitDiagnostics -t -1 --expectSignal 0 {datacard} -n fitDiagnostics_{name}_bkgOnly -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots
echo python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgOnly.root -g fitDiagnostics_{name}_bkgOnly_plots.root
combine -M FitDiagnostics -t -1 --expectSignal 0 {datacard} -n _{name}_bkgOnly -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots
python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgOnly.root -g fitDiagnostics_{name}_bkgOnly_plots.root --skipFitS > fitDiagnostics_{name}_bkgOnly.log
python ../../printPulls.py fitDiagnostics_{name}_bkgOnly_plots.root
combine -M FitDiagnostics -t -1 --expectSignal 1 {datacard} -n _{name}_bkgPlusSig -m 125 --robustHesse 1 --robustFit=1 --rMin -20 --rMax 20 #--plots
python ../../../../HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics_{name}_bkgPlusSig.root -g fitDiagnostics_{name}_bkgPlusSig_plots.root --skipFitB > fitDiagnostics_{name}_bkgPlusSig.log
python ../../printPulls.py fitDiagnostics_{name}_bkgPlusSig_plots.root

#print NLL for check
combineTool.py -M FastScan -w {name}_combine_workspace.root:w -o {name}_nll
""".format(workspace_root=workspace_file,
           datacard=os.path.basename(datacard),
           name=output_prefix,
           fake_mass=fake_mass,
           systematics=(0 if options.nosys else 1))
        script_file = os.path.join(output_dir,
                                   output_prefix + '_run_closureChecks.sh')
        with open(script_file, 'w') as f:
            f.write(script)

        st = os.stat(script_file)
        os.chmod(script_file, st.st_mode | stat.S_IEXEC)

        # Write small script for impacts
        script = """#! /bin/bash

# Run impacts
combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --doInitialFit --robustFit=1 --robustHesse 1 --rMin -20 --rMax 20 -t -1
combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --robustFit=1 --robustHesse 1 --doFits --rMin -20 --rMax 20 -t -1 --parallel 32
combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 -o {name}_expected_impacts.json --rMin -20 --rMax 20 -t -1
plotImpacts.py -i {name}_expected_impacts.json -o {name}_expected_impacts --per-page 50

combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --doInitialFit --robustFit=1 --robustHesse 1 --rMin -20 --rMax 20
combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 --robustFit=1 --doFits --robustHesse 1 --rMin -20 --rMax 20 --parallel 32
combineTool.py -M Impacts -d {name}_combine_workspace.root -m 125 -o {name}_impacts.json --rMin -20 --rMax 20
plotImpacts.py -i {name}_impacts.json -o {name}_impacts --per-page 50
""".format(workspace_root=workspace_file,
           datacard=os.path.basename(datacard),
           name=output_prefix,
           fake_mass=fake_mass,
           systematics=(0 if options.nosys else 1))
        script_file = os.path.join(output_dir,
                                   output_prefix + '_run_impacts.sh')
        with open(script_file, 'w') as f:
            f.write(script)

        st = os.stat(script_file)
        os.chmod(script_file, st.st_mode | stat.S_IEXEC)

        # Write small script for postfit shapes
        script = """#! /bin/bash

# Run postfit
echo combine -M FitDiagnostics {datacard} -n _{name}_postfit --saveNormalizations --saveShapes --saveWithUncertainties --preFitValue 0 --rMin -20 --rMax 20 --robustHesse 1 --robustFit=1 -v 1
combine -M FitDiagnostics {datacard} -n _{name}_postfit --saveNormalizations --saveShapes --saveWithUncertainties --preFitValue 0 --rMin -20 --rMax 20 --robustHesse 1 --robustFit=1 -v 1 #--plots
PostFitShapesFromWorkspace -w {name}_combine_workspace.root -d {datacard} -o postfit_shapes_{name}.root -f fitDiagnostics_{name}_postfit.root:fit_b --postfit --sampling
python ../../convertPostfitShapesForPlotIt.py -i postfit_shapes_{name}.root
$CMSSW_BASE/src/UserCode/HEPToolsFCNC/plotIt/plotIt -o postfit_shapes_{name}_forPlotIt ../../postfit_plotIt_config_{coupling}_{year}.yml -y
$CMSSW_BASE/src/UserCode/HEPToolsFCNC/plotIt/plotIt -o postfit_shapes_{name}_forPlotIt ../../postfit_plotIt_config_{coupling}_{year}_qcd.yml -y
""".format(workspace_root=workspace_file,
           datacard=os.path.basename(datacard),
           name=output_prefix,
           fake_mass=fake_mass,
           systematics=(0 if options.nosys else 1),
           coupling=("Hut" if "Hut" in output_prefix else "Hct"),
           year=options.dataYear)
        script_file = os.path.join(output_dir,
                                   output_prefix + '_run_postfit.sh')
        with open(script_file, 'w') as f:
            f.write(script)

        st = os.stat(script_file)
        os.chmod(script_file, st.st_mode | stat.S_IEXEC)
예제 #7
0
def  writeCard(input,theLambda,select,region=-1) :
	print "writing cards"
	variables =[]
	if opt.isResonant : variables.append('HHKin_mass_raw')
	else : variables.append('MT2')

	#out_dir = opt.outDir
	theOutputDir = "{0}{1}{2}".format(theLambda,select,variables[0])
	dname = "_"+opt.channel+opt.outDir
	out_dir = "cards{1}/{0}/".format(theOutputDir,dname)
	print out_dir
	#in_dir = "/grid_mnt/vol__vol_U__u/llr/cms/ortona/diHiggs/CMSSW_7_4_7/src/KLUBAnalysis/combiner/cards_MuTauprova/HHSM2b0jMcutBDTMT2/";
	cmb1 = ch.CombineHarvester()
	cmb1.SetFlag('workspaces-use-clone', True)

	cmd = "mkdir -p {0}".format(out_dir)
	print cmd
	regionName = ["","regB","regC","regD"]
	regionSuffix = ["SR","SStight","OSinviso","SSinviso"]
	status, output = commands.getstatusoutput(cmd)   
	#outFile = opt.outDir+"/chCard{0}{2}_{1}_{3}.txt".format(theLambda,opt.channel,regionName[region+1],select)
	thechannel = "1"
	if opt.channel == "MuTau" : thechannel="2"
	elif opt.channel == "TauTau" : thechannel = "3"

	if "0b0j" in select : theCat = "0"
	if "2b0j" in select : theCat = "2"
	elif "1b1j" in select : theCat = "1"
	elif "boosted" in select : theCat = "3"

	outFile = "hh_{0}_C{1}_L{2}_13TeV.txt".format(thechannel,theCat,theLambda)
	file = open( "temp.txt", "wb")


	#read config
	categories = []
	#for icat in range(len(input.selections)) :
	#	categories.append((icat, input.selections[icat]))
	categories.append((0,select))
	backgrounds=[]
	MCbackgrounds=[]
	processes=[]
	processes.append(lambdaName)
	inRoot = TFile.Open(opt.filename)
	for bkg in input.background:
		#Add protection against empty processes => If I remove this I could build all bins at once instead of looping on the selections
		templateName = "{0}_{1}_SR_{2}".format(bkg,select,variables[0])
		print templateName
		template = inRoot.Get(templateName)
		if template.Integral()>0.000001 :
			backgrounds.append(bkg)
			processes.append(bkg)
			if bkg is not "QCD" :
				MCbackgrounds.append(bkg)

	#print backgrounds
	allQCD = False
	allQCDs = [0,0,0,0]
	for regionsuff in range(len(regionSuffix)) :
		for ichan in range(len(backgrounds)):
			if "QCD" in backgrounds[ichan] :
				fname = "data_obs"
				if regionSuffix[regionsuff] == "SR" :
					fname="QCD"
				templateName = "{0}_{1}_{3}_{2}".format(fname,select,variables[0],regionSuffix[regionsuff])
				template = inRoot.Get(templateName)
				#allQCDs.append(template.Integral())
				allQCDs[regionsuff]= allQCDs[regionsuff]+template.Integral()
				iQCD = ichan
			elif regionSuffix[regionsuff] is not "SR" :
				templateName = "{0}_{1}_{3}_{2}".format(backgrounds[ichan],select,variables[0],regionSuffix[regionsuff])
				template = inRoot.Get(templateName)
				allQCDs[regionsuff] = allQCDs[regionsuff] - template.Integral()

	if allQCDs[0]>0 and allQCDs[1]>0 and allQCDs[2]>0 and allQCDs[3]>0 : allQCD = True
	for i in range(4) : print allQCDs[i]
	#add processes to CH
	#masses->125 
	#analyses->Res/non-Res(HHKin_fit,MT2)
	#eras->13TeV 
	#channels->mutau/tautau/etau 
	#bin->bjet categories
	#print signals, signals[0]
	cmb1.AddObservations([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], categories)
	cmb1.AddProcesses([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], backgrounds, categories, False)
	cmb1.AddProcesses([theLambda.replace(lambdaName,"")], variables, ['13TeV'], [opt.channel], [lambdaName], categories, True) #signals[0]

	if region < 0 :

		#Systematics (I need to add by hand the shape ones)
		#potrei sostituire theLambda con "signal"
		#syst = systReader("../config/systematics.cfg",[theLambda],backgrounds,file)
		syst = systReader("../config/systematics.cfg",[lambdaName],backgrounds,file)
		syst.writeOutput(False)
		syst.verbose(True)
		if(opt.channel == "TauTau" ): 
			syst.addSystFile("../config/systematics_tautau.cfg")
		elif(opt.channel == "MuTau" ): 
			syst.addSystFile("../config/systematics_mutau.cfg")
			#if(opt.isResonant):
			#	syst.addSystFile("../config/systematics_resonant.cfg")
			#else : syst.addSystFile("../config/systematics_nonresonant.cfg")
		elif(opt.channel == "ETau" ): 
			syst.addSystFile("../config/systematics_etau.cfg")
			#if(opt.isResonant):
			#	syst.addSystFile("../config/systematics_resonant.cfg")
			#else : syst.addSystFile("../config/systematics_nonresonant.cfg")
		if opt.theory : syst.addSystFile("../config/syst_th.cfg")
		syst.writeSystematics()

		for isy in range(len(syst.SystNames)) :
			if "CMS_scale_t" in syst.SystNames[isy] or "CMS_scale_j" in syst.SystNames[isy]: continue
			for iproc in range(len(syst.SystProcesses[isy])) :
				if "/" in syst.SystValues[isy][iproc] :
					f = syst.SystValues[isy][iproc].split("/")
					systVal = (float(f[0]),float(f[1]))
				else :
					systVal = float(syst.SystValues[isy][iproc])
				#print isy, iproc, systVal
				print "adding Syst",systVal,syst.SystNames[isy],syst.SystTypes[isy],"to",syst.SystProcesses[isy][iproc]
				cmb1.cp().process([syst.SystProcesses[isy][iproc]]).AddSyst(cmb1, syst.SystNames[isy],syst.SystTypes[isy],ch.SystMap('channel','bin_id')([opt.channel],[0],systVal))
		if opt.shapeUnc > 0:
			jesproc = MCbackgrounds
			jesproc.append(lambdaName)
			if "1b1j" in select and opt.channel == "TauTau" : jesproc.remove("DY0b")
			cmb1.cp().process(jesproc).AddSyst(cmb1, "CMS_scale_j_13TeV","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000))
			cmb1.cp().process(jesproc).AddSyst(cmb1, "CMS_scale_t_13TeV","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000))
			cmb1.cp().process(["TT"]).AddSyst(cmb1, "top","shape",ch.SystMap('channel','bin_id')([opt.channel],[0],1.000))

	    #	$BIN        --> proc.bin()
	    #	$PROCESS    --> proc.process()
	    #	$MASS       --> proc.mass()
	    #	$SYSTEMATIC --> syst.name()
#		cmb1.cp().ExtractShapes(
#			opt.filename,
#			"$PROCESS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]),
#			"$PROCESS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1]))
		cmb1.cp().backgrounds().ExtractShapes(
			opt.filename,
			"$PROCESS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]),
			"$PROCESS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1]))
		cmb1.cp().signals().ExtractShapes(
			opt.filename,
			"$PROCESS$MASS_$BIN_{1}_{0}".format(variables[0],regionSuffix[region+1]),
			"$PROCESS$MASS_$BIN_{1}_{0}_$SYSTEMATIC".format(variables[0],regionSuffix[region+1]))

		bbb = ch.BinByBinFactory()
		bbb.SetAddThreshold(0.1).SetMergeThreshold(0.5).SetFixNorm(True)
		bbbQCD = ch.BinByBinFactory()
		bbbQCD.SetAddThreshold(0.0).SetMergeThreshold(0.5).SetFixNorm(True)
		if opt.binbybin : 
			bbb.MergeBinErrors(cmb1.cp().process(MCbackgrounds))
			bbbQCD.MergeBinErrors(cmb1.cp().process(["QCD"]))
			bbbQCD.AddBinByBin(cmb1.cp().process(["QCD"]), cmb1)
			bbb.AddBinByBin(cmb1.cp().process(MCbackgrounds), cmb1)
		#cmb1.cp().PrintProcs().PrintSysts()

		#outroot = TFile.Open(opt.outDir+"/chCard{0}{2}_{1}_{3}.input.root".format(theLambda,opt.channel,regionName[region+1],select),"RECREATE")
		#outtxt = "hh_{0}_C{1}_L{2}_13TeV.txt".format(theChannel,theCat,theHHLambda)
		outroot = TFile.Open(out_dir+"hh_{0}_C{1}_L{2}_13TeV.input.root".format(thechannel,theCat,theLambda),"RECREATE")
		cmb1.WriteDatacard(out_dir+outFile,out_dir+"hh_{0}_C{1}_L{2}_13TeV.input.root".format(thechannel,theCat,theLambda))
		if allQCD :
			file = open( out_dir+outFile, "a")	
			file.write("alpha rateParam {0} QCD (@0*@1/@2) QCD_regB,QCD_regC,QCD_regD".format(select))
	elif allQCD :
		#print thechannel,theCat,theLambda #,regionName2[region+1]
		#outFile = "hh_{0}_C{1}_L{2}_13TeV.txt".format(thechannel,theCat,theLambda)
		#print region, allQCD
		#print regionName2[region+1]
		#print outFile
		#print "hh_"+thechannel#+"_C"+theCat+"_L"+theLambda+"_13TeV_"+regionName[region+1]+".txt"
		#print "hh_"+thechannel+"_C"+theCat#+"_L"+theLambda+"_13TeV_"+regionName[region+1]+".txt"
		#print "hh_"+thechannel+"_C"+theCat+"_L"+theLambda#+"_13TeV_"+regionName[region+1]+".txt"
		#print "hh_"+thechannel+"_C"+theCat+"_L"+theLambda+"_13TeV_"#+regionName[region+1]+".txt"
		#print outFile
		outFile = "hh_{0}_C{1}_L{2}_13TeV_{3}.txt".format(thechannel,theCat,theLambda,regionName[region+1])
		file = open( out_dir+outFile, "wb")

		file.write("imax 1\n")
		file.write("jmax {0}\n".format(len(backgrounds)-1))
		file.write("kmax *\n")

		file.write("------------\n")
		file.write("shapes * * FAKE\n".format(opt.channel,regionName[region+1]))
		file.write("------------\n")

		templateName = "data_obs_{1}_{3}_{2}".format(bkg,select,variables[0],regionSuffix[region+1])
		template = inRoot.Get(templateName)        
		file.write("bin {0} \n".format(select))
		obs = template.GetEntries()
		file.write("observation {0} \n".format(obs))

		file.write("------------\n")

		file.write("bin ")        
		for chan in backgrounds:
			file.write("{0} ".format(select))
		file.write("\n")      

		file.write("process ")
		for chan in backgrounds:
			file.write("{0} ".format(chan))
		#file.write("QCD ")
		file.write("\n")

		file.write("process ")
		for chan in range(len(backgrounds)): #+1 for the QCD
			file.write("{0} ".format(chan+1))
		file.write("\n")

		file.write("rate ")
		rates = []
		iQCD = -1
		totRate = 0
		for ichan in range(len(backgrounds)):
			if "QCD" in backgrounds[ichan] :
				rates.append(-1)
				iQCD = ichan
			else :
				templateName = "{0}_{1}_{3}_{2}".format(backgrounds[ichan],select,variables[0],regionSuffix[region+1])
				template = inRoot.Get(templateName)
				#print templateName
				brate = template.Integral()
				rates.append(brate)
				totRate = totRate + brate
		if iQCD >= 0 : rates[iQCD] = TMath.Max(0.0000001,obs-totRate)
		for ichan in range(len(backgrounds)):
			file.write("{0:.4f} ".format(rates[ichan]))
		file.write("\n")
		file.write("------------\n")
		file.write("QCD_{0} rateParam  {1} QCD 1 \n".format(regionName[region+1],select))
예제 #8
0
def prepareShapesAndCards(options):

    cb = ch.CombineHarvester()

    if options.fit_mode == 'shape_CR1':
        cats = [
            (1, 'SR'),
            (2, 'CR1')
        ]
        print('-- QCD estimation: fit bin-by-bin by assuming shape in CR1 and SR is the same --')

    elif options.fit_mode == 'abcd':
        cats = [
            (1, 'SR'),
            (2, 'CR1'),
            (3, 'VR'),
            (4, 'CR2'),
        ]
        print('-- QCD etimation: bin-by-bin ABCD using the four regions --')

    # object to handle the factorisation of uncertainties among ttbar components
    factTheory = defs.FactorisedTheory(options.fact_theory)
    
    # factorise shape uncertainties for ttbar components
    theory_shape_systs = []
    for procs, syst in defs.theory_shape_systs:
        for newProcs,newSyst in factTheory.getGrouping(procs, syst):
            theory_shape_systs.append((newProcs, newSyst))

    if options.randomise:
        print("-- Will randomise MC predictions according to MC stat uncertainties!")
    
    # Process shapes
    processed_shapes = os.path.join(options.output, 'processed_shapes.root')
    QCD_VR_ratios = utils.extractShapes(options.input, processed_shapes, defs.tt_bkg + defs.other_bkg, defs.sig_processes, options.data, fact_theory=factTheory, equal_bins=options.equal_bins, sub_folder=options.sub_folder, randomise=options.randomise, rebinSB=options.rebinsb)
    Nbins = len(QCD_VR_ratios)

    cb.AddObservations(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], cats)
    
    cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], defs.sig_processes, cats, True)

    cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], defs.tt_bkg + defs.other_bkg, cats, False)

    ### QCD estimate: add all "delta" templates
    QCD_processes = [ 'QCD_bin_{}'.format(i+1) for i in range(Nbins) ]
    cb.AddProcesses(['*'], ['ttbb'], ['13TeV_2016'], ['FH'], QCD_processes, cats, False)


    ### Systematics
    added_theory_systs = []
    added_exp_systs = []

    # Modeling systematics, not on QCD! ###
    cbWithoutQCD = cb.cp().process_rgx(['QCD.*'], False)
    
    # Theory rate uncertainties from the JSON file
    if options.rate_systs is not None:
        for json_file in options.rate_systs:
            added_theory_systs += addRateSystematics(cb, json_file, options.sub_folder, factTheory)
    
    # Experimental rate uncertainties from the JSON file
    if options.exp_rate is not None:
        for json_file in options.exp_rate:
            added_exp_systs += addRateSystematics(cb, json_file, options.sub_folder)

    # Luminosity
    cbWithoutQCD.AddSyst(cb, 'lumi_$ERA', 'lnN', ch.SystMap('era')(['13TeV_2016'], defs.getLumiUncertainty('13TeV_2016')))
    added_exp_systs.append('lumi_13TeV_2016')

    # Experimental systematics, common for all processes and categories
    for s in defs.exp_systs:
        # If we have added it already as a rate systematics, skip it!
        if s not in added_exp_systs:
            added_exp_systs.append(s)
            cbWithoutQCD.AddSyst(cb, s, 'shape', ch.SystMap()(1.))

    # Theory shape systematics
    for syst in theory_shape_systs:
        if syst[1] not in added_theory_systs:
            added_theory_systs.append(syst[1])
            cbWithoutQCD.cp().process(syst[0]).AddSyst(cb, syst[1], 'shape', ch.SystMap()(1.))
    
    # Theory rate systematics (not taken from JSON)
    for name,syst in defs.theory_rate_systs.items():
        if not name in added_theory_systs:
            added_theory_systs.append(name)
            cbWithoutQCD.AddSyst(cb, name, syst[0], syst[1])

        
    ### QCD systematics: add a lnN for each bin using the ratio QCD_subtr/QCD_est in the VR
    if options.QCD_systs:
        print('-- Will apply bin-by-bin uncertainties on QCD estimate from ratio in VR --')
        if options.fit_mode == 'shape_CR1':
            for i in range(1, Nbins+1):
                # lnN = 1 + abs(1 - QCD_VR_ratios[i-1])
                ratio = QCD_VR_ratios[i-1]
                lnN = ratio if ratio > 1 else 1./ratio
                cb.cp().bin(['SR']).process(['QCD_bin_{}'.format(i)]).AddSyst(cb, 'QCD_shape_bin_{}'.format(i), 'lnN', ch.SystMap()(lnN))
        elif options.fit_mode == 'abcd':
            # using max
            # QCD_VR_ratios = [1.1047956681135658, 1.104982852935791, 1.0103355569221637, 1.0365746040205628, 1.027778957040471, 1.1635257239763037, 1.0604289770126343, 1.0326651334762573, 1.0882024148481384, 1.0879310369491577, 1.2372238755691953, 1.1039656400680542, 1.1208300590515137, 1.1252394914627075, 1.0652162084238805, 1.1746360299507677, 1.1441897907967598, 1.032749056816101, 1.1105864995541361, 1.264707088470459, 1.1289979219436646, 1.1032386479572462, 1.3740112781524658, 1.0779788494110107, 1.0679041983173836, 1.1521316766738892, 1.0189466861549783, 1.1371627554677426, 1.180934637513623, 1.0807719230651855, 1.1220710277557373, 1.2163840919860773, 1.1803903579711914, 1.1331188470149183, 1.2841500043869019, 1.124382576013972, 1.2853591442108154, 1.1161022064238948, 1.0491153764429137, 1.3020191192626953, 1.6365387568006153, 1.3135310411453247, 1.183979775003691, 1.3237843031833378, 1.105936050415039, 1.4582525497144114, 1.2740960121154785, 1.1744883060455322, 1.2689180716203021, 1.5666807889938354, 1.1884409189224243, 1.6787212785213594, 1.1295689911887752, 1.2143068313598633, 1.144478440284729]
            # using geometric average
            QCD_VR_ratios = [1.0556093647141687, 1.0658984862062695, 1.0057472468756388, 1.0208612636340562, 1.0185833946498413, 1.1211169739938442, 1.0353973123690785, 1.0258664065695766, 1.0586147959018684, 1.0522305760086619, 1.1354690006073973, 1.072695547895069, 1.0799492240063984, 1.0621373200388462, 1.0593700756267987, 1.1529412209016232, 1.122536304991689, 1.0187320772559685, 1.0972767308832914, 1.175709681780302, 1.0832093989858067, 1.0823283151259013, 1.1831016555993352, 1.054608634579664, 1.0599488955753065, 1.0752925245754967, 1.017269399510584, 1.122209514629158, 1.1702168450787551, 1.0695165830450506, 1.0857979999559528, 1.2041393773004465, 1.1151294041413826, 1.1230274391829085, 1.2502545040629076, 1.1070056845911258, 1.139110776895264, 1.082765772887927, 1.0487710649869804, 1.2332536614187524, 1.4655095128617284, 1.19038044691305, 1.1104215756611893, 1.1838495100927606, 1.0880046566588846, 1.4004062409319984, 1.248629899444753, 1.1411489734003788, 1.1805956668619682, 1.4378115712379096, 1.129952938278906, 1.3437817991926544, 1.0912141233598036, 1.1453139866153634, 1.1135893789689448]
            for i in range(1, Nbins+1):
                lnN = 1.05
                # lnN = QCD_VR_ratios[i-1]
                cb.cp().bin(['SR']).process(['QCD_bin_{}'.format(i)]).AddSyst(cb, 'QCD_shape_bin_{}'.format(i), 'lnN', ch.SystMap()(lnN))
            # cb.cp().process(['ttlf']).AddSyst(cb, 'ttlf_norm', 'lnN', ch.SystMap()(1.2))


    extraStrForQCD = ''
    # To define nuisance group with all QCD parameters
    paramListQCD = []
    
    if options.fit_mode == 'shape_CR1':
        ### QCD estimate: fit shape from CR1, normalisation floating
        extraStrForQCD += 'scale_ratio_QCD_CR1_SR extArg 1. [0.,2.]\n'
        paramListQCD.append('scale_ratio_QCD_CR1_SR')
        
        for i in range(1, Nbins+1):
            extraStrForQCD += 'yield_QCD_SR_bin_{0} rateParam SR QCD_bin_{0} 1. [0.,2.]\n'.format(i)
            paramListQCD.append('yield_QCD_SR_bin_{}'.format(i))
    
        for i in range(1, Nbins+1):
            extraStrForQCD += 'yield_QCD_CR1_bin_{0} rateParam CR1 QCD_bin_{0} (@0*@1) scale_ratio_QCD_CR1_SR,yield_QCD_SR_bin_{0}\n'.format(i)
        
        if options.QCD_systs:
            for i in range(1, Nbins+1):
                paramListQCD.append('QCD_shape_bin_{}'.format(i))
    
    elif options.fit_mode == 'abcd':
        ### QCD estimate: add the rate params for each bin in the CR1, CR2 and VR
        ### The yield in the SR is then expressed as CR1*VR/CR2
        for i in range(1, Nbins+1):
            extraStrForQCD += 'yield_QCD_CR1_bin_{0} rateParam CR1 QCD_bin_{0} 1. [0.,5.]\n'.format(i)
            extraStrForQCD += 'yield_QCD_CR2_bin_{0} rateParam CR2 QCD_bin_{0} 1. [0.,5.]\n'.format(i)
            extraStrForQCD += 'yield_QCD_VR_bin_{0} rateParam VR QCD_bin_{0} 1. [0.,5.]\n'.format(i)
        
            extraStrForQCD += 'yield_QCD_SR_bin_{0} rateParam SR QCD_bin_{0} (@0*@1/@2) yield_QCD_VR_bin_{0},yield_QCD_CR1_bin_{0},yield_QCD_CR2_bin_{0}\n'.format(i)
            
            paramListQCD.append('yield_QCD_CR1_bin_{}'.format(i))
            paramListQCD.append('yield_QCD_CR2_bin_{}'.format(i))
            paramListQCD.append('yield_QCD_VR_bin_{}'.format(i))
    
    cb.AddDatacardLineAtEnd(extraStrForQCD)

    # Define systematic groups
    syst_groups = {
            "theory": added_theory_systs,
            "exp": added_exp_systs,
            "QCD": paramListQCD,
            "extern": defs.externalised_nuisances,
        }

    def getNuisanceGroupString(groups):
        m_str = ""
        for g in groups:
            m_str += g + ' group = '
            for sys in groups[g]:
                m_str += sys + ' '
            m_str += '\n'
        return m_str

    cb.AddDatacardLineAtEnd(getNuisanceGroupString(syst_groups))

    cb.cp().ExtractShapes(processed_shapes, '$BIN/$PROCESS', '$BIN/$PROCESS_$SYSTEMATIC')

    if options.bbb:
        print('-- Will add bin-by-bin uncertainties for MC statistics --')
        # MC statistics - has to be done after the shapes have been extracted!
        # bbb_bkg = ch.BinByBinFactory().SetVerbosity(5)
        # bbb_bkg.SetAddThreshold(0.02).SetMergeThreshold(0.5).SetFixNorm(False)
        # bbb_bkg.MergeBinErrors(cbWithoutQCD.cp().backgrounds())
        # bbb_bkg.AddBinByBin(cbWithoutQCD.cp().backgrounds(), cb)

        # bbb_sig = ch.BinByBinFactory().SetVerbosity(5).SetFixNorm(False)
        # bbb_sig.MergeBinErrors(cbWithoutQCD.cp().signals())
        # bbb_sig.AddBinByBin(cbWithoutQCD.cp().signals(), cb)
        
        # bbb = ch.BinByBinFactory().SetVerbosity(5)
        # bbb.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False).SetPoissonErrors(True)
        # bbb.MergeBinErrors(cb.cp())
        # bbb.AddBinByBin(cb.cp(), cb)
        
        # bbb_sig = ch.BinByBinFactory().SetVerbosity(5)
        # bbb_sig.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False)
        # bbb_sig.MergeBinErrors(cb.cp().signals())
        # bbb_sig.AddBinByBin(cb.cp().signals(), cb)
        # bbb_bkg = ch.BinByBinFactory().SetVerbosity(5)
        # bbb_bkg.SetAddThreshold(0.).SetMergeThreshold(1.).SetFixNorm(False)
        # bbb_bkg.MergeBinErrors(cb.cp().backgrounds())
        # bbb_bkg.AddBinByBin(cb.cp().backgrounds(), cb)

        # Use combine internal BBB (default: BB lite, merging everything for sig & bkg separately?)
        cb.AddDatacardLineAtEnd("* autoMCStats 5000 0 1\n")
        # cb.AddDatacardLineAtEnd("* autoMCStats 10000000 0 1\n")
    
    output_dir = options.output

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    datacard = os.path.join(output_dir, 'datacard.dat')
    output_shapes = os.path.join(output_dir, 'shapes.root')
        
    cb.WriteDatacard(datacard, output_shapes)

    initWorkSpace = """
#!/bin/bash

if [[ ! -f workspace.root ]]; then
    text2workspace.py datacard.dat -o workspace.root
fi

RMIN=0.
RMAX=5.0
NPOINTS=50
export FIT_OPT=( --freezeNuisanceGroups=extern --cminDefaultMinimizerStrategy 0 --X-rtd MINIMIZER_MaxCalls=999999999 --X-rtd MINIMIZER_analytic --robustFit 1 --cminDefaultMinimizerPrecision 1E-12 )
"""
    if options.data:
        print("-- WILL USE REAL DATA IN SR")
        initWorkSpace += 'export TOY=""\n'
    else:
        print("-- Will use Asimov toy")
        initWorkSpace += 'export TOY="-t -1"\n'

    def createScript(content, filename):
        script_path = os.path.join(output_dir, filename)
        with open(script_path, 'w') as f:
            f.write(initWorkSpace)
            f.write(content)
        # make script executable
        st = os.stat(script_path)
        os.chmod(script_path, st.st_mode | stat.S_IEXEC)

    # Script: simple fit, fit diagnostics, postfit plots, frequentist toys, goodness of fit
    script = """
combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal=1 ${TOY} --saveWorkspace --algo singles --setCrossingTolerance 1E-7 "${FIT_OPT[@]}" 2>&1 | tee fit.log
#combine -M FitDiagnostics -d workspace.root --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 --skipBOnlyFit --saveShapes --saveNormalizations --saveWithUncertainties --plots "${FIT_OPT[@]}" 2>&1 | tee fitDiag.log
#combine -M FitDiagnostics -d workspace.root --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 --skipBOnlyFit --robustHesse 1 "${FIT_OPT[@]}" 2>&1 | tee fitDiag.log
#../plotCovariance.py

# frequentist toys
#combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal $1 -t 1000 -n _freq_$1 --toysFrequentist "${FIT_OPT[@]}" -s -1 > freq_$1.log
#combine -M MultiDimFit -d workspace.root --rMin $RMIN --rMax $RMAX --expectSignal $1 -t 1000 -n _freqNoSyst_$1 --toysNoSystematics "${FIT_OPT[@]}" -s -1 > freqNoSyst_$1.log

# Goodness of fit
#combine -M GoodnessOfFit workspace.root --algo=saturated "${FIT_OPT[@]}" 
#parallel --gnu -j 5 -n0 combine -M GoodnessOfFit workspace.root --algo=saturated "${FIT_OPT[@]}" -t 100 -s -1 --toysFreq ::: {1..10}
#hadd higgsCombineTest.GoodnessOfFit.mH120.toys.root higgsCombineTest.GoodnessOfFit.mH120.*.root

# Postfit plots
#PostFitShapesFromWorkspace -d datacard.dat -w workspace.root -o postfit_shapes.root -m 120 -f fitDiagnostics.root:fit_s --postfit --sampling --print 2>&1 | tee postFitRates.log
"""
    createScript(script, 'do_fit.sh')


    # Script: plots of NLL vs. r for different uncertainties
    script = """
RMIN=0.5
RMAX=3.
combine -M MultiDimFit --algo grid --points $NPOINTS --rMin $RMIN --rMax $RMAX ${TOY} --expectSignal=1 -n _nominal workspace.root "${FIT_OPT[@]}"
# stat-only: get best-fit parameters and dataset from saved workspace (so that it also works for post-fit)
combine -M MultiDimFit --algo grid --points $NPOINTS --rMin $RMIN --rMax $RMAX -n _stat -S 0 --snapshotName "MultiDimFit" -d higgsCombineTest.MultiDimFit.mH120.root "${FIT_OPT[@]}"

plot1DScan.py higgsCombine_nominal.MultiDimFit.mH120.root --others 'higgsCombine_stat.MultiDimFit.mH120.root:Freeze all:2' --breakdown syst,stat
    """
    createScript(script, 'do_DeltaNLL_plot.sh')


    # Script: impacts signal injected
    script = """
folder=impacts
mkdir ${folder}
pushd ${folder}

combineTool.py -M Impacts -d ../workspace.root ${TOY} -m 120 --rMin $RMIN --rMax $RMAX --expectSignal=1 --doInitialFit "${FIT_OPT[@]}"
combineTool.py -M Impacts -d ../workspace.root ${TOY} -m 120 --rMin $RMIN --rMax $RMAX --expectSignal=1 --doFits --parallel 6 "${FIT_OPT[@]}" --setParameterRanges CMS_qg_Weight=-2,2 --cminPreScan
combineTool.py -M Impacts -d ../workspace.root -m 120 -o impacts.json
plotImpacts.py -i impacts.json -o impacts
plotImpacts.py -i impacts.json -o impacts_qcd --groups QCD
plotImpacts.py -i impacts.json -o impacts_no_qcd --groups '!QCD' '!extern'

popd
    """
    createScript(script, 'do_impacts.sh')


    # Script: plots of NLL vs. nuisance parameters
    script = """
function scan_param() {{
    combine -M MultiDimFit --algo grid --points 20 -n _$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 "${{FIT_OPT[@]}}" --floatOtherPOIs 1
    plot1DScan.py higgsCombine_$1.MultiDimFit.mH120.root --output scan_$1 --POI $1
 
    combine -M MultiDimFit --algo grid --points 20 -n _freeze_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 "${{FIT_OPT[@]}}" -S 0 --floatOtherPOIs 1
    plot1DScan.py higgsCombine_freeze_$1.MultiDimFit.mH120.root --output scan_freeze_$1 --POI $1
    
    combine -M MultiDimFit --algo grid --points 20 -n _freezeQCD_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3:$1={scan} -P $1 --freezeNuisanceGroups extern,QCD --floatOtherPOIs 1
    plot1DScan.py higgsCombine_freezeQCD_$1.MultiDimFit.mH120.root --output scan_freezeQCD_$1 --POI $1
}}
export -f scan_param # needed for parallel

mkdir scans
pushd scans
SHELL=/bin/bash parallel --gnu -j 6 scan_param ::: {params}
popd
"""
    createScript(script.format(scan="-2,2", params=" ".join(syst_groups['exp'])), 'do_exp_scans.sh')
    createScript(script.format(scan="-2,2", params=" ".join(syst_groups['theory'])), 'do_theory_scans.sh')
    
    script = """
function scan_param() {{
    combine -M MultiDimFit --algo grid --points 20 -n _$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3 -P $1 --autoRange 3 "${{FIT_OPT[@]}}" --floatOtherPOIs 1
    plot1DScan.py higgsCombine_$1.MultiDimFit.mH120.root --output scan_$1 --POI $1
    
    combine -M MultiDimFit --algo grid --points 20 -n _freeze_$1 --snapshotName "MultiDimFit" -d ../higgsCombineTest.MultiDimFit.mH120.root --setParameterRanges r=0,3 -P $1 --autoRange 3 "${{FIT_OPT[@]}}" -S 0 --floatOtherPOIs 1
    plot1DScan.py higgsCombine_freeze_$1.MultiDimFit.mH120.root --output scan_freeze_$1 --POI $1
}}
export -f scan_param # needed for parallel

mkdir scans
pushd scans
SHELL=/bin/bash parallel --gnu -j 6 scan_param ::: {params}
popd
"""
    createScript(script.format(params=" ".join(syst_groups['QCD'])), 'do_QCD_scans.sh')
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import ROOT
ROOT.gSystem.Load("libHiggsAnalysisCombinedLimit")
ROOT.gROOT.SetBatch()
import os
import CombineHarvester.CombineTools.ch as ch
import argparse

p = argparse.ArgumentParser(
    "Script to print out systematics of a given process in a given category for a given 'combined.txt.cmb' datacard. This does not included additional uncertainties, that could be added by the signal model used (e.g. by MSSM models)"
)
p.add_argument("--category", required=True, help="Category to be inspected")
p.add_argument("--process", required=True, help="Process to be inspected")
p.add_argument("--datacard",
               required=True,
               help="Path to the datacard 'combined.txt.cmb'")

args = p.parse_args()

cmb_card = ch.CombineHarvester()
cmb_card.SetFlag("workspaces-use-clone", True)
cmb_card.ParseDatacard(args.datacard, "", "", "", 0, "200")
cmb_card.bin([args.category]).cp().PrintProcs()
cmb_card.bin([args.category]).cp().process([args.process]).PrintSysts()
def main():
    if len(sys.argv) < 2:
        print 'Error! No data filename specified (in json format). Exiting...'
        exit(0)

    lumi_syst = {'2016': 1.025, '2017': 1.023, '2018': 1.025}
    trig_MC_syst = {'2016': 1.01, '2017': 1.01, '2018': 1.01}
    trig_data_syst = {'2016': 1.005, '2017': 1.005, '2018': 1.005}
    JES_syst = {'2016': 1.02, '2017': 1.06, '2018': 1.02}
    JER_syst = {'2016': 1.01, '2017': 1.09, '2018': 1.025}
    #EGM_ID_syst = {'2016': 1.02, '2017': 1.03, '2018': 1.03}
    veto_ID_syst = {'2016': 1.005, '2017': 1.005, '2018': 1.005}
    GM_ID_syst = { # split by match category
            '0': {'2016': 1.000, '2017': 1.000, '2018': 1.000}, # just dummy = 1, no GMs
            '1': {'2016': 1.007, '2017': 1.003, '2018': 1.005},
            '2': {'2016': 1.012, '2017': 1.005, '2018': 1.010}
            }
    dSA_ID_prompt_syst = {'2016': 1.006, '2017': 1.007, '2018': 1.006}
    dSA_ID_displ_syst = {'2016': 1.001, '2017': 1.001, '2018': 1.001}
    dSA_reco_prompt_syst = {'2016': 1.004, '2017': 1.003, '2018': 1.003}

    # background ABCD closure syst
    closure_syst_1mm = {
            '0': {'2016': 1.25, '2017': 1.25, '2018': 1.25},
            '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05},
            '2': {'2016': 1.80, '2017': 1.80, '2018': 1.80}
            }
    closure_syst_10mm = {
            '0': {'2016': 1.25, '2017': 1.25, '2018': 1.25},
            '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05},
            '2': {'2016': 1.80, '2017': 1.80, '2018': 1.80}
            }
    closure_syst_100mm = {
            '0': {'2016': 1.20, '2017': 1.20, '2018': 1.20},
            '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05},
            '2': {'2016': 1.20, '2017': 1.20, '2018': 1.20}
            }
    closure_syst_1000mm = {
            '0': {'2016': 1.10, '2017': 1.10, '2018': 1.10},
            '1': {'2016': 1.05, '2017': 1.05, '2018': 1.05},
            '2': {'2016': 1.20, '2017': 1.20, '2018': 1.20}
            }

    cat_translator = {u'29': '0', u'30': '1', u'31': '2'}

    with open(sys.argv[1]) as f:
        data = json.load(f)

    tot_yields = {}
    for name,yields in data.iteritems():
        
        sample = name.split('_sig_')[-1]

        cut = [token for token in name.split('_') if 'cut' in token][0]
        if '29' not in cut and '30' not in cut and '31' not in cut:
            continue

        if '29' in cut or '30' in cut:
            if 'vxy_zoom_' not in name:
                continue
        else:
            if 'vxy_zoomzoom_' not in name:
                continue

        print "name ", name

        if yields['A_sig'] == 0.0 and yields['B_sig'] == 0.0 and yields['C_sig'] == 0.0 and yields['D_sig'] == 0.0:
            continue

        if sample not in tot_yields:
            tot_yields[sample] = {}

        cut_num = cut.split('cut')[-1]
        if cut_num not in tot_yields[sample]:
            tot_yields[sample][cut_num] = {}

        tot_yields[sample][cut_num]['A_sig'] = yields["A_sig"]
        tot_yields[sample][cut_num]['B_sig'] = yields["B_sig"]
        tot_yields[sample][cut_num]['C_sig'] = yields["C_sig"]
        tot_yields[sample][cut_num]['D_sig'] = yields["D_sig"]
        tot_yields[sample][cut_num]['A_bkg'] = max(yields["A_bkg"], 0.1)
        tot_yields[sample][cut_num]['B_bkg'] = max(yields["B_bkg"], 0.1)
        tot_yields[sample][cut_num]['C_bkg'] = max(yields["C_bkg"], 0.1)
        tot_yields[sample][cut_num]['D_bkg'] = max(yields["D_bkg"], 0.1)
        tot_yields[sample][cut_num]['c1'] = tot_yields[sample][cut_num]['B_bkg'] / tot_yields[sample][cut_num]['A_bkg']
        tot_yields[sample][cut_num]['c2'] = tot_yields[sample][cut_num]['C_bkg'] / tot_yields[sample][cut_num]['A_bkg']

    c1s = {}
    c2s = {}
    for sample in tot_yields:
        if '161718' not in sample: continue
        basename_no_year = sample.split('_161718')[0]
        c1s[basename_no_year] = {}
        c2s[basename_no_year] = {}
        for cut_num,props in tot_yields[sample].items():
            c1s[basename_no_year][cut_num] = props['c1']
            c2s[basename_no_year][cut_num] = props['c2']

    #! [part1]
    # Define four categories labeled A, B, C and D, and
    # set the observed yields in a map.
    cats = OrderedDict()
    obs_rates = OrderedDict()
    sig_rates = OrderedDict()
    systs = OrderedDict()
    names = OrderedDict()
    for sample in tot_yields:
        if '161718' in sample: continue
        if '2016' in sample:
            year = '2016'
        elif '2017' in sample:
            year = '2017'
        elif '2018' in sample:
            year = '2018'

        basename_no_year = sample.split('_'+year)[0]

        if basename_no_year not in cats:
            cats[basename_no_year] = []
            obs_rates[basename_no_year] = {}
            sig_rates[basename_no_year] = {}
            systs[basename_no_year] = {}
            names[basename_no_year] = basename_no_year

        for cut in tot_yields[sample]:
            cat_temp = cat_translator[cut] + 'match' + '_' + year
            index = 0 if len(cats[basename_no_year]) == 0 else cats[basename_no_year][-1][0] + 1
            cats[basename_no_year].append((index, 'A_' + cat_temp))
            cats[basename_no_year].append((index, 'B_' + cat_temp))
            cats[basename_no_year].append((index, 'C_' + cat_temp))
            cats[basename_no_year].append((index, 'D_' + cat_temp))
            obs_rates[basename_no_year]['A_' + cat_temp] = tot_yields[sample][cut]['A_bkg']
            obs_rates[basename_no_year]['B_' + cat_temp] = tot_yields[sample][cut]['B_bkg']
            obs_rates[basename_no_year]['C_' + cat_temp] = tot_yields[sample][cut]['C_bkg']
            obs_rates[basename_no_year]['D_' + cat_temp] = tot_yields[sample][cut]['D_bkg']
            sig_rates[basename_no_year]['A_' + cat_temp] = tot_yields[sample][cut]['A_sig']
            sig_rates[basename_no_year]['B_' + cat_temp] = tot_yields[sample][cut]['B_sig']
            sig_rates[basename_no_year]['C_' + cat_temp] = tot_yields[sample][cut]['C_sig']
            sig_rates[basename_no_year]['D_' + cat_temp] = tot_yields[sample][cut]['D_sig']
            systs[basename_no_year]["bkgA_norm_" + cat_temp] = tot_yields[sample][cut]['A_bkg']
            #systs[basename_no_year]["c1_" + cat_temp] = tot_yields[sample][cut]['c1']
            #systs[basename_no_year]["c2_" + cat_temp] = tot_yields[sample][cut]['c2']
            systs[basename_no_year]["c1_" + cat_temp] = c1s[basename_no_year][cut]
            systs[basename_no_year]["c2_" + cat_temp] = c2s[basename_no_year][cut]
            closure_syst = {}
            if '_1_' in sample:
                closure_syst = closure_syst_1mm
            elif '_10_' in sample:
                closure_syst = closure_syst_10mm
            elif '_100_' in sample:
                closure_syst = closure_syst_100mm
            elif '_1000_' in sample:
                closure_syst = closure_syst_1000mm
            systs[basename_no_year]["closure_" + cat_temp] = closure_syst[cat_translator[cut]][year]

            systs[basename_no_year]["lumi_" + year] = lumi_syst[year]
            systs[basename_no_year]["trig_MC_" + year] = trig_MC_syst[year]
            systs[basename_no_year]["trig_data_" + year] = trig_data_syst[year]
            systs[basename_no_year]["JES_" + year] = JES_syst[year]
            systs[basename_no_year]["JER_" + year] = JER_syst[year]
            #systs[basename_no_year]["EGM_ID_" + year] = EGM_ID_syst[year]
            systs[basename_no_year]["veto_ID_" + year] = veto_ID_syst[year]
            systs[basename_no_year]["GM_ID_" + cat_temp] = GM_ID_syst[cat_translator[cut]][year]
            systs[basename_no_year]["dSA_ID_prompt_" + year] = dSA_ID_prompt_syst[year]
            systs[basename_no_year]["dSA_ID_displ_" + year] = dSA_ID_displ_syst[year]
            systs[basename_no_year]["dSA_reco_prompt_" + year] = dSA_reco_prompt_syst[year]

    #! [part1]

    for name, cat, obs_rate, sig_rate, syst in zip(names.values(), cats.values(), obs_rates.values(), sig_rates.values(), systs.values()):

        cb = ch.CombineHarvester()
        cb.SetVerbosity(0)

        #! [part2]
        print "cat ", cat
        cb.AddObservations(["*"], [""], ["13TeV"], [""],          cat)
        cb.AddProcesses(   ["*"], [""], ["13TeV"], [""], ["sig"], cat, True)
        cb.AddProcesses(   ["*"], [""], ["13TeV"], [""], ["bkg"], cat, False)

        cb.cp().ForEachObs(lambda x: x.set_rate(obs_rate[x.bin()]))
        cb.cp().backgrounds().ForEachProc(lambda x: x.set_rate(1))
        cb.cp().signals().ForEachProc(lambda x: x.set_rate(sig_rate[x.bin()]))
        #! [part2]

        #! [part3]
        # Create a unqiue floating parameter in each bin
        #print 'name ', sample
        #print 'tot_yields name keys', tot_yields[sample].keys()
        for y in ['2016', '2017', '2018']:
            for m in ['0', '1', '2']:
                if 'A_'+m+'match_'+y not in sig_rate:
                    continue
                cb.cp().backgrounds().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'bkgA_norm_'+m+'match_'+y, "rateParam", ch.SystMap()(syst['bkgA_norm_'+m+'match_'+y]))
                cb.cp().backgrounds().bin(['B_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c1_'+m+'match', "rateParam", ch.SystMap()(syst['c1_'+m+'match_'+y]))
                cb.cp().backgrounds().bin(['C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c2_'+m+'match', "rateParam", ch.SystMap()(syst['c2_'+m+'match_'+y]))
                cb.cp().backgrounds().bin(['C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'c2_'+m+'match', "rateParam", ch.SystMap()(syst['c2_'+m+'match_'+y]))
                # background systs
                cb.cp().backgrounds().bin(['C_'+m+'match_'+y]).AddSyst(cb, 'closure_'+m+'match', 'lnN', ch.SystMap()(syst['closure_'+m+'match_'+y]))

                # signal systs
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'lumi_'+y, 'lnN', ch.SystMap()(syst['lumi_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'trig_MC', 'lnN', ch.SystMap()(syst['trig_MC_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'trig_data', 'lnN', ch.SystMap()(syst['trig_data_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'JES', 'lnN', ch.SystMap()(syst['JES_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'JER_'+y, 'lnN', ch.SystMap()(syst['JER_'+y]))
                #cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'EGM_ID', 'lnN', ch.SystMap()(syst['EGM_ID_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'veto_ID', 'lnN', ch.SystMap()(syst['veto_ID_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'GM_ID_'+m+'match', 'lnN', ch.SystMap()(syst['GM_ID_'+m+'match_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_ID_prompt', 'lnN', ch.SystMap()(syst['dSA_ID_prompt_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_ID_displ', 'lnN', ch.SystMap()(syst['dSA_ID_displ_'+y]))
                cb.cp().signals().bin(['A_'+m+'match_'+y, 'B_'+m+'match_'+y, 'C_'+m+'match_'+y, 'D_'+m+'match_'+y]).AddSyst(cb, 'dSA_reco_prompt', 'lnN', ch.SystMap()(syst['dSA_reco_prompt_'+y]))
        #! [part3]

        #! [part4]
        #cb.PrintAll();
        #for cb_name, cb in cbs.iteritems():
        print ">> Writing datacard for hist: ", name 
        if not os.path.exists(name):
            os.mkdir("sig_" + name)
        cb.WriteDatacard("sig_" + str(name) + "/datacard.txt")
예제 #11
0
    def __init__(self, cb=None):
        super(Datacards, self).__init__()

        self.cb = cb
        if self.cb is None:
            self.cb = ch.CombineHarvester()
        if log.isEnabledFor(logging.DEBUG):
            self.cb.SetVerbosity(1)

        self.configs = datacardconfigs.DatacardConfigs()

        # common systematics
        self.lumi_syst_args = [
            "lumi_$ERA", "lnN",
            ch.SystMap("era")(["7TeV", "8TeV"], 1.026)(["13TeV"], 1.046)
        ]
        self.electron_efficiency_syst_args = [
            "CMS_eff_e",
            "lnN",
            ch.SystMap("era")(["7TeV", "8TeV"], 1.02)(["13TeV"],
                                                      1.05)  # copied from 8TeV
        ]
        self.muon_efficiency_syst_args = [
            "CMS_eff_m",
            "lnN",
            ch.SystMap("era")(["7TeV", "8TeV"], 1.02)(["13TeV"],
                                                      1.05)  # copied from 8TeV
        ]
        self.tau_efficiency_corr_syst_args = [
            "CMS_eff_t_$ERA",
            "lnN",
            ch.SystMap("era",
                       "channel")(["7TeV", "8TeV"], ["mt", "et"],
                                  1.08)(["7TeV", "8TeV"], ["tt"],
                                        1.19)(["13TeV"], ["mt", "et", "tt"],
                                              1.05)  # copied from 8TeV
        ]
        self.tau_efficiency_syst_args = [
            "CMS_eff_t_$CHANNEL_$ERA",
            "lnN",
            ch.SystMap("era",
                       "channel")(["7TeV", "8TeV"], ["mt", "et"],
                                  1.08)(["7TeV", "8TeV"], ["tt"],
                                        1.19)(["13TeV"], ["mt", "et", "tt"],
                                              1.03)  # copied from 8TeV
        ]
        self.btag_efficiency_syst_args = [
            "CMS_eff_b_$ERA",
            "lnN",
            ch.SystMap("era", "channel")(["13TeV"], ["mt"],
                                         0.96)  # copied from 8TeV
            (["13TeV"], ["et"], 0.96)  # copied from 8TeV
            (["13TeV"], ["em"], 0.93)  # copied from 8TeV
            (["13TeV"], ["tt"], 0.93)  # copied from 8TeV
        ]
        self.met_scale_syst_args = [
            "CMS_$ANALYSIS_scale_met_$ERA",
            "lnN",
            ch.SystMap("era",
                       "process")(["13TeV"], ["ggH", "qqH", "WH", "ZH", "VH"],
                                  0.98)  # copied from 8TeV
            (["13TeV"],
             ["ZTT", "ZLL", "ZL", "ZJ", "TTJ", "TT", "VV", "WJ", "W"],
             1.03)  # copied from 8TeV
        ]

        self.ztt_cross_section_syst_args = [
            "CMS_$ANALYSIS_zttNorm_$ERA", "lnN",
            ch.SystMap("era",
                       "process")(["7TeV", "8TeV"], ["ZTT", "ZLL", "ZL", "ZJ"],
                                  1.03)(["13TeV"], ["ZTT", "ZLL", "ZL", "ZJ"],
                                        1.04)
        ]
        self.ttj_cross_section_syst_args = [
            "CMS_$ANALYSIS_ttjNorm_$ERA",
            "lnN",
            ch.SystMap("era", "process")(["7TeV"], ["TTJ"],
                                         1.08)(["8TeV"], ["TTJ"],
                                               1.1)(["13TeV"], ["TTJ", "TT"],
                                                    1.06)  # copied from 8TeV
        ]
        self.ttj_extrapol_syst_args = [
            "CMS_$ANALYSIS_ttjExtrapol_$ERA",
            "lnN",
            ch.SystMap("era", "process")(["7TeV"], ["TTJ"],
                                         1.08)(["8TeV"], ["TTJ"],
                                               1.1)(["13TeV"], ["TTJ", "TT"],
                                                    1.10)  # copied from 8TeV
        ]

        #self.singlet_cross_section_syst_args = [
        #"CMS_$ANALYSIS_singletNorm_$ERA",
        #"lnN",
        #ch.SystMap("era", "process")
        #(["13TeV"], [], 1.04)

        self.vv_cross_section_syst_args = [
            "CMS_$ANALYSIS_vvNorm_$ERA",
            "lnN",
            ch.SystMap("era", "process")(["7TeV", "8TeV"], ["VV"],
                                         1.15)(["13TeV"], ["VV"],
                                               1.10)  # copied from 8TeV
        ]
        self.wj_cross_section_syst_args = [
            "CMS_$ANALYSIS_wjNorm_$CHANNEL_$ERA",
            "lnN",
            ch.SystMap("era", "process",
                       "channel")(["7TeV", "8TeV"], ["WJ"], ["mt", "et"],
                                  1.2)(["13TeV"], ["WJ", "W"], ["mt", "et"],
                                       1.04)  # copied from 8TeV
        ]
        self.wj_extrapol_syst_args = [
            "CMS_$ANALYSIS_wjExtrapol_$CHANNEL_$ERA",
            "lnN",
            ch.SystMap("era", "process",
                       "channel")(["7TeV", "8TeV"], ["WJ"], ["mt", "et"],
                                  1.2)(["13TeV"], ["WJ", "W"], ["mt", "et"],
                                       1.2)  # copied from 8TeV
        ]

        self.qcd_syst_args = [
            "CMS_$ANALYSIS_qcdSyst_$BIN_$ERA",
            "lnN",
            ch.SystMap("era", "process",
                       "bin")(["13TeV"], ["QCD"],
                              ["mt_inclusive", "et_inclusive"],
                              1.06)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["mt_0jet_high"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["mt_0jet_low"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["mt_1jet_high"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["mt_1jet_low"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["mt_2jet_vbf"], 1.3)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["et_0jet_high"], 1.06)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["et_0jet_low"], 1.06)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["et_1jet_high"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["et_1jet_low"], 1.1)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["et_2jet_vbf"], 1.3)  # copied from 8TeV
            (["13TeV"], ["QCD"], ["tt_inclusive"], 1.35)  # copied from 8TeV
        ]
        self.zllFakeTau_syst_args = [
            "CMS_$ANALYSIS_eFakeTau_$CHANNEL_$ERA", "lnN",
            ch.SystMap("era", "process",
                       "channel")(["7TeV", "8TeV"], ["ZLL"], ["mt", "et"],
                                  1.30)(["13TeV"], ["ZLL", "ZL", "ZJ"],
                                        ["mt", "tt"],
                                        1.15)(["13TeV"], ["ZLL", "ZL", "ZJ"],
                                              ["et"], 1.30)
        ]

        self.zee_norm_syst_args = [
            "CMS_$ANALYSIS_zeeNorm_$ERA", "lnN",
            ch.SystMap("era", "process")(["13TeV"], ["ZLL", "ZL"], 1.03)
        ]

        self.jec_syst_args = [
            "CMS_scale_j_$ERA", "shape",
            ch.SystMap("era")(["13TeV"], 1.0)
        ]
        self.tau_es_syst_args = [
            "CMS_scale_t_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["mt"],
                                         1.0)(["13TeV"], ["et"],
                                              1.0)(["13TeV"], ["tt"], 1.0)
        ]
        self.ele_es_syst_args = [
            "CMS_scale_e_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["em"], 1.0)(["13TeV"],
                                                                 ["et"], 1.0)
        ]

        self.probetau_es_syst_args = [
            "CMS_scale_probetau_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0)
        ]
        self.probeele_es_syst_args = [
            "CMS_scale_probeele_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0)
        ]
        self.tagele_es_syst_args = [
            "CMS_scale_tagele_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0)
        ]

        self.massres_syst_args = [
            "CMS_scale_massRes_$CHANNEL_$ERA", "shape",
            ch.SystMap("era", "channel")(["13TeV"], ["et"], 1.0)
        ]

        # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt1314TeV#s_13_0_TeV
        self.htt_qcd_scale_syst_args = [
            "QCD_scale_$PROCESS", "lnN",
            ch.SystMap("era",
                       "process")(["13TeV"], ["ggH"],
                                  1.079)(["13TeV"], ["qqH"],
                                         1.007)(["13TeV"], ["VH"],
                                                1.015)(["13TeV"], ["WH"],
                                                       1.015)(["13TeV"],
                                                              ["ZH"], 1.038)
        ]
        self.htt_pdf_scale_syst_args = [
            "PDF_scale_$PROCESS", "lnN",
            ch.SystMap("era",
                       "process")(["13TeV"], ["ggH"],
                                  1.071)(["13TeV"], ["qqH"],
                                         1.032)(["13TeV"], ["VH"],
                                                1.022)(["13TeV"], ["WH"],
                                                       1.022)(["13TeV"],
                                                              ["ZH"], 1.022)
        ]
        self.ztt_pdf_scale_syst_args = [
            "PDF_scale_$PROCESS", "lnN",
            ch.SystMap("era", "process")(["13TeV"], ["ZTT"], 1.015)
        ]
        self.ztt_pdf_scale_syst_args = [
            "PDF_scale_$PROCESS", "lnN",
            ch.SystMap("era", "process")(["13TeV"], ["ZTT"], 1.015)
        ]

        # CMS AN-13-262 (v8, table 3)
        self.htt_ueps_syst_args = [
            "UEPS",
            "lnN",
            ch.SystMap("era", "process",
                       "bin")(["13TeV"], ["ggH"], ["mt_0jet_high"],
                              1.060)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["mt_0jet_low"], 1.073)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["mt_1jet_high"], 0.996)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["mt_1jet_low"], 1.007)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["mt_2jet_vbf"], 0.988)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["et_0jet_high"], 1.060)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["et_0jet_low"], 1.073)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["et_1jet_high"], 0.996)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["et_1jet_low"], 1.007)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["et_2jet_vbf"], 0.988)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["em_0jet_high"], 1.063)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["em_0jet_low"], 1.089)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["em_1jet_high"], 1.004)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["em_1jet_low"], 1.000)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["em_2jet_vbf"], 0.988)  # copied from 8TeV
            (["13TeV"], ["ggH"], ["tt_inclusive"], 1.025)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_0jet_high"],
             1.028)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_0jet_low"],
             1.018)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_1jet_high"],
             0.954)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_1jet_low"],
             0.946)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["mt_2jet_vbf"],
             0.893)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_0jet_high"],
             1.028)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_0jet_low"],
             1.018)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_1jet_high"],
             0.954)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_1jet_low"],
             0.946)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["et_2jet_vbf"],
             0.893)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_0jet_high"],
             1.042)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_0jet_low"],
             1.035)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_1jet_high"],
             0.978)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_1jet_low"],
             0.984)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["em_2jet_vbf"],
             0.893)  # copied from 8TeV
            (["13TeV"], ["qqH", "WH", "ZH", "VH"], ["tt_inclusive"],
             1.025)  # copied from 8TeV
        ]
def main():
    if len(sys.argv) < 2:
        print 'Error! No data filename specified (in json format). Exiting...'

    cat_translator = {u'28': '0', u'29': '1', u'30': '2'}

    with open(sys.argv[1]) as f:
        data = json.load(f)

    tot_yields = {}
    for name,yields in data.iteritems():
        if 'vtx_sign' not in name: continue

        print "name ", name
        
        sample = name.split('_sig_')[-1]

        cut = [token for token in name.split('_') if 'cut' in token][0]
        print 'cut ', cut
        if '28' not in cut and '29' not in cut and '30' not in cut: continue

        if sample not in tot_yields:
            tot_yields[sample] = {}

        cut_num = cut.split('cut')[-1]

        if cut_num not in tot_yields[sample]:
            tot_yields[sample][cut_num] = {}

        tot_yields[sample][cut_num]['A_sig'] = yields["A_sig"]
        tot_yields[sample][cut_num]['B_sig'] = yields["B_sig"]
        tot_yields[sample][cut_num]['C_sig'] = yields["C_sig"]
        tot_yields[sample][cut_num]['D_sig'] = yields["D_sig"]
        tot_yields[sample][cut_num]['A_bkg'] = max(yields["A_bkg"], 0.1)
        tot_yields[sample][cut_num]['B_bkg'] = max(yields["B_bkg"], 0.1)
        tot_yields[sample][cut_num]['C_bkg'] = max(yields["C_bkg"], 0.1)
        tot_yields[sample][cut_num]['D_bkg'] = max(yields["D_bkg"], 0.1)
        tot_yields[sample][cut_num]['c1'] = tot_yields[sample][cut_num]['B_bkg'] / tot_yields[sample][cut_num]['A_bkg']
        tot_yields[sample][cut_num]['c2'] = tot_yields[sample][cut_num]['C_bkg'] / tot_yields[sample][cut_num]['A_bkg']

    #! [part1]
    # Define four categories labeled A, B, C and D, and
    # set the observed yields in a map.
    for sample in tot_yields:
        cats = []
        obs_rates = {}
        sig_rates = {}
        for cut in tot_yields[sample]:
            cat = cat_translator[cut] + 'match'
            cats.append('A_' + cat)
            cats.append('B_' + cat)
            cats.append('C_' + cat)
            cats.append('D_' + cat)
            obs_rates['A_' + cat] = tot_yields[sample][cut]['A_bkg']
            obs_rates['B_' + cat] = tot_yields[sample][cut]['B_bkg']
            obs_rates['C_' + cat] = tot_yields[sample][cut]['C_bkg']
            obs_rates['D_' + cat] = tot_yields[sample][cut]['D_bkg']
            sig_rates['A_' + cat] = tot_yields[sample][cut]['A_sig']
            sig_rates['B_' + cat] = tot_yields[sample][cut]['B_sig']
            sig_rates['C_' + cat] = tot_yields[sample][cut]['C_sig']
            sig_rates['D_' + cat] = tot_yields[sample][cut]['D_sig']
        cats_with_number = []
        for num in range(0, len(cats)):
            cats_with_number.append((num, cats[num]))
        #! [part1]

        #! [part2]
        cb = ch.CombineHarvester()
        cb.SetVerbosity(0)

        cb.AddObservations(["*"], [""], ["13TeV"], [""],          cats_with_number)
        cb.AddProcesses(   ["*"], [""], ["13TeV"], [""], ["sig"], cats_with_number, True)
        cb.AddProcesses(   ["*"], [""], ["13TeV"], [""], ["bkg"], cats_with_number, False)

        cb.cp().ForEachObs(lambda x: x.set_rate(obs_rates[x.bin()]))
        #cb.cp().ForEachObs([&](ch::Observation *x) {
        #        x->set_rate(obs_rates[x->bin()]);
        #        });
        cb.cp().backgrounds().ForEachProc(lambda x: x.set_rate(1))
        #cb.cp().backgrounds().ForEachProc([](ch::Process *x) {
        #        x->set_rate(1);
        #        });
        cb.cp().signals().ForEachProc(lambda x: x.set_rate(sig_rates[x.bin()]))
        #cb.cp().signals().ForEachProc([&](ch::Process *x) {
        #        x->set_rate(sig_rates[x->bin()]);
        #        });
        #! [part2]

        #! [part3]
        # Create a unqiue floating parameter in each bin
        print 'name ', sample
        print 'tot_yields name keys', tot_yields[sample].keys()
        cb.cp().backgrounds().bin(["A_0match", "B_0match", "C_0match", "D_0match"]).AddSyst(cb, "bkgA_norm_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['A_bkg']))
        cb.cp().backgrounds().bin(["B_0match", "D_0match"]).AddSyst(cb, "c1_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['c1']))
        cb.cp().backgrounds().bin(["C_0match", "D_0match"]).AddSyst(cb, "c2_0match", "rateParam", ch.SystMap()(tot_yields[sample][u'28']['c2']))
        cb.cp().backgrounds().bin(["A_1match", "B_1match", "C_1match", "D_1match"]).AddSyst(cb, "bkgA_norm_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['A_bkg']))
        cb.cp().backgrounds().bin(["B_1match", "D_1match"]).AddSyst(cb, "c1_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['c1']))
        cb.cp().backgrounds().bin(["C_1match", "D_1match"]).AddSyst(cb, "c2_1match", "rateParam", ch.SystMap()(tot_yields[sample][u'29']['c2']))
        cb.cp().backgrounds().bin(["A_2match", "B_2match", "C_2match", "D_2match"]).AddSyst(cb, "bkgA_norm_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['A_bkg']))
        cb.cp().backgrounds().bin(["B_2match", "D_2match"]).AddSyst(cb, "c1_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['c1']))
        cb.cp().backgrounds().bin(["C_2match", "D_2match"]).AddSyst(cb, "c2_2match", "rateParam", ch.SystMap()(tot_yields[sample][u'30']['c2']))
        #! [part3]

        #! [part4]
        #cb.PrintAll();
        print ">> Writing datacard for hist: ", sample 
        if not os.path.exists(sample):
            os.mkdir("sig_" + sample)
        cb.WriteDatacard("sig_" + str(sample) + "/datacard.txt")
예제 #13
0
def create_datacards(channel, method):
    backgrounds = {"ZTT": "ztt", "VV": "vv", "W": "wj", "QCD": "qcd"}
    backgrounds.update({
        "TT": "ttj",
        "ZLL": "zll"
    } if channel == "em" else {
        "TTT": "ttt",
        "TTJJ": "ttjj",
        "ZL": "zl",
        "ZJ": "zj"
    })

    ##Combine harvester instance
    cb = ch.CombineHarvester()

    #Instance for extracting histograms
    sample_settings = samples.Samples()
    config_list = []

    ##weights
    cut_info = yaml.load(
        open(os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/cuts.yaml",
             "r"))
    parameter_info = yaml.load(
        open(
            os.environ["CMSSW_BASE"] +
            "/src/FlavioOutput/Configs/parameter.yaml", "r"))

    weights = []

    for index, category in enumerate(
        ["(njetspt30==0)", "(njetspt30==1)",
         "(njetspt30>1)"]):  #, "(nbtag==2)"]):
        #cut_strings = [parameter_info[param][4] for param in cut_info[index][channel].keys()]
        #cut_values, cut_side = [[entry[index2] for entry in cut_info[index][channel].values()] for index2 in [0,1]]

        weights.append(
            {  #"cut_based":	"*".join([cut_strings[index2].format(side = side, cut = value) for index2, (side, value) in enumerate(zip(cut_side, cut_values))] + [category]),
                "cut_BDT": "(BDT_forcut_score>0.7)*" + category,
                "cut_Ada_BDT": "(BDT_Ada_forcut_score>0.0)*" + category,
                "BDT": category,
                "Ada_BDT": category
            })

    ##Fill combine harvester with categories/processes
    for category in categories + controlregions:
        ##Add data/signal
        cb.AddObservations(["*"], ["lfv"], ["13TeV"], [channel], [category])

        if not "CR" in category[1]:
            cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel],
                            ["Z" + channel.upper()], [category], True)

        ##Config for each category
        config = sample_settings.get_config(
            [
                getattr(samples.Samples, sample)
                for sample in data.values() + {
                    True: ["z" + channel],
                    False: []
                }["CR" not in category[1]] + backgrounds.values()
            ],
            channel,
            None,
            estimationMethod="new",
            weight=weights[category[0]][method])
        config.pop("legend_markers")
        config += {
            "filename": "input_" + method + "_nominal_" + category[1],
            "plot_modules": ["ExportRoot"],
            "file_mode": "UPDATE",
            "directories": os.environ["MCPATH"],
            "x_expressions": x[method],
            "x_bins": x_bins[method],
            "output_dir": output_dir + channel,
            "no_cache": True
        }
        config["labels"] = [
            category[1] + "/" + process for process in data.keys() + {
                True: ["Z" + channel.upper()],
                False: []
            }["CR" not in category[1]] + backgrounds.keys()
        ]
        config_list.append(config)

        for process in backgrounds.keys():
            ##Add background
            cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], [process],
                            [category], False)

    ##Fill combine with control regions
    for CR in controlregions:
        cb.cp().channel([channel]).bin([category[1]]).AddSyst(
            cb, "scale_" + category[1].remove("_CR"), "rateParam",
            ch.SystMap())

        for category in catogories:
            cb.cp().bin([category[0]
                         ]).AddSyst(cb, "scale_" + category[1].remove("_CR"),
                                    "rateParam", ch.SystMapFunc())

    ##Fill combine harvester with systematics
    systematics_list = SystLib.SystematicLibary()
    systematics_factory = systematics.SystematicsFactory()

    for (systematic, process, category) in systematics_list.get_LFV_systs(
            channel, lnN=True) + systematics_list.get_LFV_systs(channel,
                                                                shape=True):
        cb.cp().channel([channel]).process(process).AddSyst(cb, *systematic)

        if "W" in process and "QCD" not in process:
            process.append("QCD")

        if "QCD" in process and "W" not in process:
            process.append("W")

        if systematic[1] == "shape":
            ##Config for each systematic shift:
            for category in categories + controlregions:

                if "CR" in category[1] and "Z" + channel.upper() in process:
                    process.remove("Z" + channel.upper())

                for shift in ["Down", "Up"]:
                    config = sample_settings.get_config(
                        [
                            getattr(samples.Samples,
                                    dict(signals, **backgrounds)[sample])
                            for sample in process
                        ],
                        channel,
                        None,
                        estimationMethod="new",
                        weight=weights[category[0]][method])
                    config.pop("legend_markers")
                    config += {
                        "filename":
                        "input_" + method + "_" + systematic[0].replace(
                            "$ERA", "13TeV").replace("$CHANNEL", channel) +
                        shift + "_" + category[1],
                        "plot_modules": ["ExportRoot"],
                        "file_mode":
                        "UPDATE",
                        "directories":
                        os.environ["MCPATH"],
                        "x_expressions":
                        x[method],
                        "x_bins":
                        x_bins[method],
                        "output_dir":
                        output_dir + channel,
                        "no_cache":
                        True
                    }
                    config["labels"] = [
                        category[1] + "/" + proc + "_" + systematic[0].replace(
                            "$ERA", "13TeV").replace("$CHANNEL", channel) +
                        shift for proc in process
                    ]

                    if systematic[0].replace("$ERA", "13TeV").replace(
                            "$CHANNEL", channel) == "CMS_scale_j_13TeV":
                        systematics_settings = systematics_factory.get(
                            systematic[0].replace("$ERA", "13TeV").replace(
                                "$CHANNEL", channel))(config, "Total")

                    else:
                        systematics_settings = systematics_factory.get(
                            systematic[0].replace("$ERA", "13TeV").replace(
                                "$CHANNEL", channel))(config)

                    config = systematics_settings.get_config(1 if shift ==
                                                             "Up" else -1)
                    config_list.append(config)

    pool = Pool(cpu_count())
    for config in config_list:
        pool.apply_async(harry_do_your_job, args=(config, ))

    pool.close()
    pool.join()

    os.system("hadd {target}.root {root_files}*.root".format(
        target=output_dir + channel + "/input_" + method,
        root_files=output_dir + channel + "/input_" + method))

    ##Fill combine harvester with the shapes which were extracted before from harry.py
    cb.cp().backgrounds().ExtractShapes(
        output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS",
        "$BIN/$PROCESS_$SYSTEMATIC")
    cb.cp().signals().ExtractShapes(
        output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS",
        "$BIN/$PROCESS_$SYSTEMATIC")

    #Write datacard and call combine
    cb.WriteDatacard(
        output_dir + channel + "/combined_" + method + ".txt",
        output_dir + channel + "/combined_datacard_" + method + ".root")

    for category in categories:
        cb_copy = cb.cp()
        cb_copy.FilterAll(lambda obj: obj.bin() != category[1])
        cb_copy.WriteDatacard(
            output_dir + channel + "/" + category[1] + "_" + method + ".txt",
            output_dir + channel + "/" + category[1] + "_datacard_" + method +
            ".root")