Ejemplo n.º 1
0
class BTagCorrector:
    def __init__(self, tagger, year, workingpoint):
        self._year = year
        common = load('data/common.coffea')
        self._wp = common['btagWPs'][tagger][year][workingpoint]
        files = {
            'deepflav': {
                '2016': 'DeepJet_2016LegacySF_V1.csv',
                '2017': 'DeepFlavour_94XSF_V4_B_F.csv',
                '2018': 'DeepJet_102XSF_V1.csv'
            },
            'deepcsv': {
                '2016': 'DeepCSV_2016LegacySF_V1.csv',
                '2017': 'DeepCSV_94XSF_V5_B_F.csv',
                '2018': 'DeepCSV_102XSF_V1.csv'
            }
        }
        filename = 'data/' + files[tagger][year]
        self.sf = BTagScaleFactor(filename, workingpoint)
        files = {
            '2016': 'btag2017.merged',
            '2017': 'btag2017.merged',
            '2018': 'btag2018.merged',
        }
        filename = 'hists/' + files[year]
        btag = load(filename)
        bpass = btag[tagger].integrate('dataset').integrate(
            'wp', workingpoint).integrate('btag', 'pass').values()[()]
        ball = btag[tagger].integrate('dataset').integrate(
            'wp', workingpoint).integrate('btag').values()[()]
        nom = bpass / np.maximum(ball, 1.)
        self.eff = lookup_tools.dense_lookup.dense_lookup(
            nom, [ax.edges() for ax in btag[tagger].axes()[3:]])

    def btag_weight(self, pt, eta, flavor, tag):
        abseta = abs(eta)

        #https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagSFMethods#1b_Event_reweighting_using_scale
        def zerotag(eff):
            return (1 - eff).prod()

        eff = self.eff(flavor, pt, abseta)
        sf_nom = self.sf.eval('central', flavor, abseta, pt)
        sf_up = self.sf.eval('up', flavor, abseta, pt)
        sf_down = self.sf.eval('down', flavor, abseta, pt)

        eff_data_nom = np.minimum(1., sf_nom * eff)
        eff_data_up = np.minimum(1., sf_up * eff)
        eff_data_down = np.minimum(1., sf_down * eff)

        nom = zerotag(eff_data_nom) / zerotag(eff)
        up = zerotag(eff_data_up) / zerotag(eff)
        down = zerotag(eff_data_down) / zerotag(eff)

        if '-1' in tag:
            nom = (1 - zerotag(eff_data_nom)) / (1 - zerotag(eff))
            up = (1 - zerotag(eff_data_up)) / (1 - zerotag(eff))
            down = (1 - zerotag(eff_data_down)) / (1 - zerotag(eff))

        return np.nan_to_num(nom), np.nan_to_num(up), np.nan_to_num(down)
Ejemplo n.º 2
0
def validate_btag(filename, btagtype, etamax):
    btagData = ROOT.BTagCalibration(btagtype, filename)
    npts = 10000
    flavor = numpy.full(npts, 5)
    abseta = numpy.random.uniform(0, etamax, size=npts)
    pt = numpy.random.exponential(50, size=npts) + numpy.random.exponential(20, size=npts)
    pt = numpy.maximum(20.1, pt)
    discr = numpy.random.rand(npts)

    coffea_sf = BTagScaleFactor(filename, BTagScaleFactor.RESHAPE, 'iterativefit', keep_df=True)
    btagReader = ROOT.BTagCalibrationReader(ROOT.BTagEntry.OP_RESHAPING, 'central', stdvec(['up_jes', 'down_jes']))
    btagReader.load(btagData, ROOT.BTagEntry.FLAV_B, 'iterativefit')
    btv_sf = makesf(btagReader)

    for syst in ['central', 'up_jes', 'down_jes']:
        csf = coffea_sf.eval(syst, flavor, abseta, pt, discr)
        bsf = btv_sf(syst, flavor, abseta, pt, discr)
        print(abs(csf - bsf).max())

    flavor = numpy.random.choice([0, 4, 5], size=npts)
    coffea_sf = BTagScaleFactor(filename, BTagScaleFactor.TIGHT, 'comb,mujets,incl', keep_df=True)
    btagReader = ROOT.BTagCalibrationReader(ROOT.BTagEntry.OP_TIGHT, 'central', stdvec(['up', 'down']))
    btagReader.load(btagData, ROOT.BTagEntry.FLAV_B, 'comb')
    btagReader.load(btagData, ROOT.BTagEntry.FLAV_C, 'mujets')
    btagReader.load(btagData, ROOT.BTagEntry.FLAV_UDSG, 'incl')
    btv_sf = makesf(btagReader)

    for syst in ['central', 'up', 'down']:
        csf = coffea_sf.eval(syst, flavor, abseta, pt, discr)
        bsf = btv_sf(syst, flavor, abseta, pt, discr)
        print(abs(csf - bsf).max())
Ejemplo n.º 3
0
class btag_scalefactor:
    def __init__(self, year, UL=True):
        self.year = year

        if self.year == 2016:
            pass
        elif self.year == 2017:
            SF_file = os.path.expandvars('$TWHOME/data/btag/DeepJet_106XUL17SF_WPonly_V2.csv')
            self.btag_sf = BTagScaleFactor(SF_file, "medium", keep_df=False)

            # and load the efficiencies
            self.effs = {
                'b':     Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL17_b_eff_deepJet.json")),
                'c':     Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL17_c_eff_deepJet.json")),
                'light': Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL17_light_eff_deepJet.json")),
            }
        elif self.year == 2018 and UL:
            SF_file = os.path.expandvars('$TWHOME/data/btag/DeepJet_106XUL18SF_WPonly.csv')
            self.btag_sf = BTagScaleFactor(SF_file, "medium", keep_df=False)

            # and load the efficiencies
            self.effs = {
                'b':     Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL18_b_eff_deepJet.json")),
                'c':     Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL18_c_eff_deepJet.json")),
                'light': Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Summer20UL18_light_eff_deepJet.json")),
            }
        elif self.year == 2018 and not UL:
            SF_file = os.path.expandvars('$TWHOME/data/btag/DeepJet_102XSF_V2.csv')
            self.btag_sf = BTagScaleFactor(SF_file, "medium", keep_df=False)

            # and load the efficiencies
            self.effs = {
                'b': Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Autumn18_b_eff_deepJet.json")),
                'c': Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Autumn18_c_eff_deepJet.json")),
                'light': Hist1D.from_json(os.path.expandvars("$TWHOME/data/btag/Autumn18_light_eff_deepJet.json")),
            }
            
   
    def Method1a(self, tagged, untagged, b_direction='central', c_direction='central'):
        import numpy as np
        '''
        tagged: jet collection of tagged jets
        untagged: jet collection untagged jets
        effs: dictionary of the tagging efficiencies (1D yahist objects)
        btag_sf: coffea b-tag SF object
        '''
        tagged_b = yahist_1D_lookup(self.effs['b'], tagged.pt)*(tagged.hadronFlavour==5)
        tagged_c = yahist_1D_lookup(self.effs['c'], tagged.pt)*(tagged.hadronFlavour==4)
        tagged_light = yahist_1D_lookup(self.effs['light'], tagged.pt)*(tagged.hadronFlavour==0)
        
        tagged_SFs_b = self.btag_sf.eval(b_direction, tagged.hadronFlavour, abs(tagged.eta), tagged.pt )
        tagged_SFs_c = self.btag_sf.eval(c_direction, tagged.hadronFlavour, abs(tagged.eta), tagged.pt )
        tagged_SFs_light = self.btag_sf.eval(c_direction, tagged.hadronFlavour, abs(tagged.eta), tagged.pt )
        
        SFs_c = ((tagged_c/tagged_c)*tagged_SFs_c)
        SFs_b = ((tagged_b/tagged_b)*tagged_SFs_b)
        SFs_light = ((tagged_light/tagged_light)*tagged_SFs_light)
        SFs_c = np.where(np.isnan(SFs_c), 0, SFs_c)
        SFs_b = np.where(np.isnan(SFs_b), 0, SFs_b)
        SFs_light = np.where(np.isnan(SFs_light), 0, SFs_light)
        
        tagged_SFs = SFs_b+SFs_c+SFs_light
        
        untagged_b = yahist_1D_lookup(self.effs['b'], untagged.pt)*(untagged.hadronFlavour==5)
        untagged_c = yahist_1D_lookup(self.effs['c'], untagged.pt)*(untagged.hadronFlavour==4)
        untagged_light = yahist_1D_lookup(self.effs['light'], untagged.pt)*(untagged.hadronFlavour==0)
        
        untagged_SFs_b = self.btag_sf.eval(b_direction, untagged.hadronFlavour, abs(untagged.eta), untagged.pt )
        untagged_SFs_c = self.btag_sf.eval(c_direction, untagged.hadronFlavour, abs(untagged.eta), untagged.pt )
        untagged_SFs_light = self.btag_sf.eval(c_direction, untagged.hadronFlavour, abs(untagged.eta), untagged.pt )
        
        SFs_c = ((untagged_c/untagged_c)*untagged_SFs_c)
        SFs_b = ((untagged_b/untagged_b)*untagged_SFs_b)
        SFs_light = ((untagged_light/untagged_light)*untagged_SFs_light)
        SFs_c = np.where(np.isnan(SFs_c), 0, SFs_c)
        SFs_b = np.where(np.isnan(SFs_b), 0, SFs_b)
        SFs_light = np.where(np.isnan(SFs_light), 0, SFs_light)
        
        untagged_SFs = SFs_b+SFs_c+SFs_light
                                
        tagged_all = (tagged_b+tagged_c+tagged_light)
        untagged_all = (untagged_b+untagged_c+untagged_light)
        
        denom = ak.prod(tagged_all, axis=1) * ak.prod((1-untagged_all), axis=1)
        num = ak.prod(tagged_all*tagged_SFs, axis=1) * ak.prod((1-untagged_all*untagged_SFs), axis=1)
        return num/denom
Ejemplo n.º 4
0
class BTagCorrector:
    def __init__(self, year, workingpoint):
        self._year = year
        self._wp = BTagEfficiency.btagWPs[year][workingpoint]
        files = {
            '2016': 'DeepCSV_Moriond17_B_H.csv.gz',
            '2017': 'DeepCSV_94XSF_V5_B_F.csv.gz',
            '2018': 'DeepCSV_102XSF_V1.csv.gz',
        }
        filename = os.path.join(os.path.dirname(__file__), 'data', files[year])
        self.sf = BTagScaleFactor(filename, workingpoint)
        files = {
            '2016': 'btagQCD2017.coffea',
            '2017': 'btagQCD2017.coffea',
            '2018': 'btagQCD2017.coffea',
        }
        filename = os.path.join(os.path.dirname(__file__), 'data', files[year])
        btag = util.load(filename)
        bpass = btag.integrate('btag', 'pass').values()[()]
        ball = btag.integrate('btag').values()[()]
        nom = bpass / numpy.maximum(ball, 1.)
        dn, up = hist.clopper_pearson_interval(bpass, ball)
        self.eff = dense_lookup(nom, [ax.edges() for ax in btag.axes()[1:]])
        self.eff_statUp = dense_lookup(up,
                                       [ax.edges() for ax in btag.axes()[1:]])
        self.eff_statDn = dense_lookup(dn,
                                       [ax.edges() for ax in btag.axes()[1:]])

    def addBtagWeight(self, weights, jets):
        abseta = abs(jets.eta)
        passbtag = jets.btagDeepB > self._wp

        # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagSFMethods#1a_Event_reweighting_using_scale
        def combine(eff, sf):
            # tagged SF = SF*eff / eff = SF
            tagged_sf = sf[passbtag].prod()
            # untagged SF = (1 - SF*eff) / (1 - eff)
            untagged_sf = ((1 - sf * eff) / (1 - eff))[~passbtag].prod()
            return tagged_sf * untagged_sf

        eff_nom = self.eff(jets.hadronFlavour, jets.pt, abseta)
        eff_statUp = self.eff_statUp(jets.hadronFlavour, jets.pt, abseta)
        eff_statDn = self.eff_statDn(jets.hadronFlavour, jets.pt, abseta)
        sf_nom = self.sf.eval('central', jets.hadronFlavour, abseta, jets.pt)
        sf_systUp = self.sf.eval('up', jets.hadronFlavour, abseta, jets.pt)
        sf_systDn = self.sf.eval('down', jets.hadronFlavour, abseta, jets.pt)

        nom = combine(eff_nom, sf_nom)
        weights.add('btagWeight',
                    nom,
                    weightUp=combine(eff_nom, sf_systUp),
                    weightDown=combine(eff_nom, sf_systDn))
        weights.add('btagEffStat',
                    numpy.ones_like(nom),
                    weightUp=combine(eff_statUp, sf_nom) / nom,
                    weightDown=combine(eff_statDn, sf_nom) / nom)
        for i in numpy.where((nom < 0.01) | (nom > 10)
                             | numpy.isnan(nom))[0][:4]:
            jet = jets[i]
            logger.info("Strange weight for event: %r", nom[i])
            logger.info("    jet pts: %r", jet.pt)
            logger.info("    jet etas: %r", jet.eta)
            logger.info("    jet flavors: %r", jet.hadronFlavour)
            logger.info("    jet btags: %r", jet.btagDeepB)
            logger.info("    result eff: %r up %r down %r", eff_nom[i],
                        eff_statUp[i], eff_statDn[i])
            logger.info("    result sf: %r", sf_nom[i])
        return nom
Ejemplo n.º 5
0
def test_BTagScalefactor():
    sf1 = BTagScaleFactor('tests/samples/testBTagSF.btag.csv', 'medium')
    sf2 = BTagScaleFactor('tests/samples/DeepCSV_102XSF_V1.btag.csv.gz',
                          BTagScaleFactor.RESHAPE, 'iterativefit')
    sf3 = BTagScaleFactor('tests/samples/DeepCSV_102XSF_V1.btag.csv.gz',
                          BTagScaleFactor.TIGHT)
    sf4 = BTagScaleFactor(
        'tests/samples/DeepCSV_2016LegacySF_V1_TuneCP5.btag.csv.gz',
        BTagScaleFactor.RESHAPE,
        'iterativefit',
        keep_df=True)
    # import pdb; pdb.set_trace()

    counts, test_eta, test_pt = dummy_jagged_eta_pt()
    test_flavor = numpy.random.choice([0, 4, 5], size=len(test_eta))
    test_allb = numpy.ones_like(test_flavor) * 5
    test_discr = numpy.random.rand(len(test_eta))
    offsets = numpy.zeros(len(counts) + 1)
    offsets[1:] = numpy.cumsum(counts)
    test_jets = ak.Array(
        ak.layout.ListOffsetArray64(
            ak.layout.Index64(offsets),
            ak.zip(
                {
                    "pt": test_pt,
                    "eta": test_eta,
                    "flavor": test_flavor,
                    "btag": test_discr,
                },
                highlevel=False)))

    expected = numpy.array([
        0.93724101, 0.89943609, 1.0671185, 1.06846618, 0.94530984, 1.06645614,
        0.91862676, 1.06645614, 0.94372127, 0.94505261, 1.06645614, 1.06645614,
        1.06645614, 1.06645614, 0.91385676, 1.06738093, 0.89943609, 0.92593492,
        1.06960044, 0.89943609, 1.06645614, 1.06645614, 0.94290361, 1.06892548,
        0.92440686, 0.92046542, 1.06645614, 0.93676041, 0.93392431, 0.91694353,
        0.89943609, 0.89943609, 0.89943609, 0.89943609, 0.89943609, 0.89943609,
        0.89943609, 0.89943609, 0.93371251, 0.89943609, 0.89943609, 0.89943609,
        0.94767034, 1.06645614, 1.0670672, 1.07136352, 0.89943609, 0.90445481,
        0.89943609, 1.06645614, 0.89943609, 0.89943609, 0.93745389, 0.90949125,
        0.91778825, 1.06645614, 1.06645614, 0.89943609, 0.89943609, 1.06645614,
        1.06645614, 1.06645614
    ])
    result = sf1.eval('central', test_flavor, test_eta, test_pt, test_discr)
    assert numpy.allclose(result, expected)

    sf1.eval('up', test_flavor, test_eta, test_pt)
    sf2.eval('central', test_allb, test_eta, test_pt, test_discr)
    with pytest.raises(ValueError):
        sf2.eval('up', test_allb, test_eta, test_pt)
    sf3.eval('central', test_flavor, test_eta, test_pt, test_discr)
    sf3.eval('up', test_flavor, test_eta, test_pt)
    with pytest.raises(ValueError):
        sf4.eval('central', test_flavor, test_eta, test_pt, test_discr)

    expected = numpy.array([
        1.2185781, 1.03526095, 1.14997077, 0.91933821, 1.2185781, 1.08865945,
        0.99422718, 1.01943199, 1.01025089, 1.20312875, 0.84198391, 0.91726759,
        0.93501452, 1.31649974, 1.14997077, 1.02107876, 1.06150099, 1.06063444,
        0.90508972, 1.20768481, 0.8484613, 0.99217259, 0.98333802, 1.31302575,
        1.0104926, 1.00474285, 1.24375693, 1.20949677, 0.91714979, 0.99533782,
        1.14997077, 1.02871797, 0.99619147, 0.97543142, 1.31518787, 1.30700837,
        1.14997077, 0.99879282, 0.98961045, 1.14997077, 0.88343516, 0.9930647,
        1.17767042, 1.14997077, 1.30594256, 0.91888068, 1.04737201, 1.03583147,
        1.02833176, 0.99527427, 0.98546895, 1.14997077, 1.04815223, 1.28007547,
        1.1970858, 1.12892238, 1.14997077, 1.14997077, 1.01656481, 0.84198391,
        1.2996388, 1.14997077
    ])
    result = sf4.eval('central', test_allb, test_eta, test_pt, test_discr)
    assert numpy.allclose(result, expected)

    sf1.eval('down', test_jets.flavor, test_jets.eta, test_jets.pt)
Ejemplo n.º 6
0
    systs = [
        "jes", "lfstats1", "lfstats2", "hfstats1", "hfstats2", "cferr1",
        "cferr2", "lf", "hf"
    ]

    #BTagScaleFactor from coffea
    t0 = time.time()
    sf = BTagScaleFactor(sf_file,
                         BTagScaleFactor.RESHAPE,
                         'iterativefit,iterativefit,iterativefit',
                         keep_df=True)
    for tsys in systs:
        for sdir in ["up", "down"]:
            tsys_name = sdir + '_' + tsys
            sf.eval(tsys_name, arr_flav[:1], arr_abs_eta[:1], arr_pt[:1],
                    arr_discr[:1], True)
    t1 = time.time()
    print("init_py", t1 - t0)

    #BTagCalibrationStandalone from POG
    t0 = time.time()
    libhmm = LibHMuMu()
    print("loading BTagCalibration")
    systs_sdir = []
    for sdir in ["up", "down"]:
        for syst in systs:
            systs_sdir += [sdir + "_" + syst]
    b = BTagCalibration(libhmm, "DeepCSV", sf_file, systs_sdir)
    t1 = time.time()
    print("init_C", t1 - t0)
Ejemplo n.º 7
0
class btag_scalefactor:
    def __init__(self, year):
        self.year = year

        if self.year == 2016:
            pass
        elif self.year == 2017:
            pass
        elif self.year == 2018:
            SF_file = os.path.expandvars(
                '$TWHOME/data/btag/DeepJet_102XSF_V2.csv')
            self.btag_sf = BTagScaleFactor(SF_file, "medium", keep_df=False)

            # and load the efficiencies
            self.effs = {
                'b':
                Hist1D.from_json(
                    os.path.expandvars(
                        "$TWHOME/data/btag/Autumn18_b_eff_deepJet.json")),
                'c':
                Hist1D.from_json(
                    os.path.expandvars(
                        "$TWHOME/data/btag/Autumn18_c_eff_deepJet.json")),
                'light':
                Hist1D.from_json(
                    os.path.expandvars(
                        "$TWHOME/data/btag/Autumn18_light_eff_deepJet.json")),
            }

    def Method1a(self, tagged, untagged):
        '''
        tagged: jet collection of tagged jets
        untagged: jet collection untagged jets
        effs: dictionary of the tagging efficiencies (1D yahist objects)
        btag_sf: coffea b-tag SF object
        '''
        tagged_b = yahist_1D_lookup(self.effs['b'],
                                    tagged.pt) * (tagged.hadronFlavour == 5)
        tagged_c = yahist_1D_lookup(self.effs['c'],
                                    tagged.pt) * (tagged.hadronFlavour == 4)
        tagged_light = yahist_1D_lookup(
            self.effs['light'], tagged.pt) * (tagged.hadronFlavour == 0)

        tagged_SFs = self.btag_sf.eval('central', tagged.hadronFlavour,
                                       abs(tagged.eta), tagged.pt)

        untagged_b = yahist_1D_lookup(
            self.effs['b'], untagged.pt) * (untagged.hadronFlavour == 5)
        untagged_c = yahist_1D_lookup(
            self.effs['c'], untagged.pt) * (untagged.hadronFlavour == 4)
        untagged_light = yahist_1D_lookup(
            self.effs['light'], untagged.pt) * (untagged.hadronFlavour == 0)

        untagged_SFs = self.btag_sf.eval('central', untagged.hadronFlavour,
                                         abs(untagged.eta), untagged.pt)

        tagged_all = (tagged_b + tagged_c + tagged_light)
        untagged_all = (untagged_b + untagged_c + untagged_light)

        denom = ak.prod(tagged_all, axis=1) * ak.prod(
            (1 - untagged_all), axis=1)
        num = ak.prod(tagged_all * tagged_SFs, axis=1) * ak.prod(
            (1 - untagged_all * untagged_SFs), axis=1)
        return num / denom
Ejemplo n.º 8
0
out_events["mask_topmunu2b"] = cms_events["mask_topmunu2b"]
out_events["mask_topmunu1b"] = cms_events["mask_topmunu1b"]
out_events["mask_wenu1b"] = cms_events["mask_wenu1b"]
out_events["mask_wenu2b"] = cms_events["mask_wenu2b"]
out_events["mask_wmunu1b"] = cms_events["mask_wmunu1b"]
out_events["mask_wmunu2b"] = cms_events["mask_wmunu2b"]

####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####
####  ####   the following code to read the SFs can be moved in another python file ####  ####  ####
####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####  ####
## btagging SFs
from coffea.btag_tools import BTagScaleFactor

btag_sf = BTagScaleFactor("data/DeepCSV_2016LegacySF_V1.csv.gz", "medium")
out_events["btagsf0"] = btag_sf.eval("central", out_events.jetflav0,
                                     abs(out_events.jeteta0),
                                     out_events.jetpt0)
out_events["btagsf1"] = btag_sf.eval("central", out_events.jetflav1,
                                     abs(out_events.jeteta1),
                                     out_events.jetpt1)
out_events["btagsf2"] = btag_sf.eval("central", out_events.jetflav2,
                                     abs(out_events.jeteta2),
                                     out_events.jetpt2)
out_events["btagsf3"] = btag_sf.eval("central", out_events.jetflav3,
                                     abs(out_events.jeteta3),
                                     out_events.jetpt3)
out_events["btagsf4"] = btag_sf.eval("central", out_events.jetflav4,
                                     abs(out_events.jeteta4),
                                     out_events.jetpt4)
out_events["btagsf5"] = btag_sf.eval("central", out_events.jetflav5,
                                     abs(out_events.jeteta5),