Example #1
0
def theory_weights_monojet(weights, df, evaluator, gen_v_pt):
    if df['is_lo_w']:
        if extract_year(df['dataset']) == 2016:
            qcd_nlo = evaluator["qcd_nlo_w_2016"](gen_v_pt)
        else:
            qcd_nlo = fitfun(gen_v_pt, 1.053, 3.163e-3, 0.746)
        theory_weights = qcd_nlo * evaluator["ewk_nlo_w"](gen_v_pt)
    elif df['is_lo_z']:
        if extract_year(df['dataset']) == 2016:
            qcd_nlo = evaluator["qcd_nlo_z_2016"](gen_v_pt)
        else:
            qcd_nlo = fitfun(gen_v_pt, 1.434, 2.210e-3, 0.443)
        theory_weights = qcd_nlo * evaluator["ewk_nlo_z"](gen_v_pt)
    elif df['is_nlo_w']:
        theory_weights = evaluator["ewk_nlo_w"](gen_v_pt)
    elif df['is_nlo_z']:
        theory_weights = evaluator["ewk_nlo_z"](gen_v_pt)
    elif df['is_lo_g']:
        theory_weights = fitfun(gen_v_pt, 1.159, 1.944e-3,
                                1.0) * evaluator["ewk_nlo_g"](gen_v_pt)
    else:
        theory_weights = np.ones(df.size)

    # Guard against invalid input pt
    invalid = (gen_v_pt <= 0) | np.isinf(gen_v_pt) | np.isnan(gen_v_pt)
    theory_weights[invalid] = 1

    weights.add('theory', theory_weights)
    return weights
Example #2
0
def scale_xs_lumi(histogram, scale_lumi=True):
    """MC normalization so that it's ready to compare to data

    :param histogram: Histogram to normalize
    :type histogram: coffea Hist
    """
    # Get the list of datasets and filter MC data sets
    datasets = list(map(str, histogram.axis('dataset').identifiers()))

    mcs = [x for x in datasets if not is_data(x)]

    # Normalize to XS * lumi/ sumw
    known_xs = load_xs()

    xs_map = {}
    for mc in mcs:
        try:
            ixs = known_xs[re.sub('_new_*pmx', '', mc)]
        except KeyError:
            print(
                f"WARNING: Cross section not found for dataset {mc}. Using 0.")
            ixs = 0
        xs_map[mc] = ixs
    norm_dict = {
        mc: 1e3 * xs_map[mc] * (lumi(extract_year(mc)) if scale_lumi else 1)
        for mc in mcs
    }
    histogram.scale(norm_dict, axis='dataset')
Example #3
0
def photon_trigger_sf(weights, photons, df):
    """MC-to-data photon trigger scale factor.

    The scale factor is obtained by separately fitting the
    trigger turn-on with a sigmoid function in data and MC.
    The scale factor is then the ratio of the two sigmoid
    functions as a function of the photon pt.

    :param weights: Weights object to write information into
    :type weights: WeightsContainer
    :param photons: Photon candidates
    :type photons: JaggedCandidateArray
    :param df: Data frame
    :type df: LazyDataFrame
    """
    year = extract_year(df['dataset'])
    x = photons.pt.max()
    if year == 2016:
        sf = np.ones(df.size)
    elif year == 2017:
        sf = sigmoid(x, 0.335, 217.91, 0.065, 0.996) / sigmoid(
            x, 0.244, 212.34, 0.050, 1.000)
    elif year == 2018:
        sf = sigmoid(x, 1.022, 218.39, 0.086, 0.999) / sigmoid(
            x, 0.301, 212.83, 0.062, 1.000)

    sf[np.isnan(sf) | np.isinf(sf)] == 1

    weights.add("trigger_photon", sf)
Example #4
0
def candidate_weights(weights, df, evaluator, muons, electrons, photons):
    # Muon ID and Isolation for tight and loose WP
    # Function of pT, eta (Order!)
    weights.add("muon_id_tight", evaluator['muon_id_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod())
    weights.add("muon_iso_tight", evaluator['muon_iso_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod())
    weights.add("muon_id_loose", evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod())
    weights.add("muon_iso_loose", evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod())

    # Electron ID and reco
    # Function of eta, pT (Other way round relative to muons!)
    weights.add("ele_reco", evaluator['ele_reco'](electrons.eta, electrons.pt).prod())
    weights.add("ele_id_tight", evaluator['ele_id_tight'](electrons[df['is_tight_electron']].eta, electrons[df['is_tight_electron']].pt).prod())
    weights.add("ele_id_loose", evaluator['ele_id_loose'](electrons[~df['is_tight_electron']].eta, electrons[~df['is_tight_electron']].pt).prod())

    # Photon ID and electron veto
    weights.add("photon_id_tight", evaluator['photon_id_tight'](photons[df['is_tight_photon']].eta, photons[df['is_tight_photon']].pt).prod())

    year = extract_year(df['dataset'])
    if year in [2016,2017]:
        csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * (photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94)
        weights.add("photon_csev", evaluator['photon_csev'](csev_sf_index).prod())
    elif year == 2018:
        csev_weight = evaluator['photon_csev'](photons.pt, photons.eta).prod()
        csev_weight[csev_weight==0] = 1
        weights.add("photon_csev", csev_weight)

    return weights
def setup_photons(df):
    # Setup photons

    if extract_year(df['dataset']) == 2016:
        id_branch = 'Photon_cutBased'
    else:
        id_branch = 'Photon_cutBasedBitmap'

    photons = JaggedCandidateArray.candidatesfromcounts(
        df['nPhoton'],
        pt=df['Photon_pt'],
        eta=df['Photon_eta'],
        abseta=np.abs(df['Photon_eta']),
        phi=df['Photon_phi'],
        mass=0 * df['Photon_pt'],
        mediumId=(df[id_branch] >= 2) & df['Photon_electronVeto'],
        r9=df['Photon_r9'],
        barrel=np.abs(df['Photon_eta']) < 1.479,
        vid=df['Photon_vidNestedWPBitmap'],
        eleveto=df['Photon_electronVeto'],
        sieie=df['Photon_sieie'],
    )

    photons = photons[(photons.pt > 200) & photons.barrel & photons.eleveto]
    return photons
Example #6
0
    def _configure(self, df):
        dataset = df['dataset']
        self._year = extract_year(dataset)

        # Reload config based on year
        cfg.DYNACONF_WORKS = "merge_configs"
        cfg.MERGE_ENABLED_FOR_DYNACONF = True
        cfg.SETTINGS_FILE_FOR_DYNACONF = bucoffea_path("config/monojet.yaml")
        cfg.ENV_FOR_DYNACONF = f"era{self._year}"
        cfg.reload()
Example #7
0
    def _configure(self, df=None):
        cfg.DYNACONF_WORKS = "merge_configs"
        cfg.MERGE_ENABLED_FOR_DYNACONF = True
        cfg.SETTINGS_FILE_FOR_DYNACONF = bucoffea_path("config/vbfhinv.yaml")

        # Reload config based on year
        if df:
            dataset = df['dataset']
            self._year = extract_year(dataset)
            cfg.ENV_FOR_DYNACONF = f"era{self._year}"
        else:
            cfg.ENV_FOR_DYNACONF = f"default"
        cfg.reload()
Example #8
0
def candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg):
    year = extract_year(df['dataset'])
    # Muon ID and Isolation for tight and loose WP
    # Function of pT, eta (Order!)
    weight_muons_id_tight = evaluator['muon_id_tight'](
        muons[df['is_tight_muon']].pt,
        muons[df['is_tight_muon']].abseta).prod()
    weight_muons_iso_tight = evaluator['muon_iso_tight'](
        muons[df['is_tight_muon']].pt,
        muons[df['is_tight_muon']].abseta).prod()

    if cfg.SF.DIMUO_ID_SF.USE_AVERAGE:
        tight_dimuons = muons[df["is_tight_muon"]].distincts()
        t0 = (evaluator['muon_id_tight'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta) \
             * evaluator['muon_iso_tight'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta)).prod()
        t1 = (evaluator['muon_id_tight'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta) \
             * evaluator['muon_iso_tight'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta)).prod()
        l0 = (evaluator['muon_id_loose'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta) \
             * evaluator['muon_iso_loose'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta)).prod()
        l1 = (evaluator['muon_id_loose'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta) \
             * evaluator['muon_iso_loose'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta)).prod()
        weights_2m_tight = 0.5 * (l0 * t1 + l1 * t0)
        weights.add(
            "muon_id_iso_tight",
            weight_muons_id_tight * weight_muons_iso_tight *
            (tight_dimuons.counts != 1) + weights_2m_tight *
            (tight_dimuons.counts == 1))
    else:
        weights.add("muon_id_iso_tight",
                    weight_muons_id_tight * weight_muons_iso_tight)

    weights.add(
        "muon_id_loose",
        evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt,
                                   muons[~df['is_tight_muon']].abseta).prod())
    weights.add(
        "muon_iso_loose",
        evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt,
                                    muons[~df['is_tight_muon']].abseta).prod())

    # Electron ID and reco
    # Function of eta, pT (Other way round relative to muons!)

    # For 2017, the reco SF is split below/above 20 GeV
    if year == 2017:
        high_et = electrons.pt > 20
        ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et],
                                            electrons.pt[high_et]).prod()
        ele_reco_sf *= evaluator['ele_reco_pt_lt_20'](
            electrons.etasc[~high_et], electrons.pt[~high_et]).prod()
    else:
        ele_reco_sf = evaluator['ele_reco'](electrons.etasc,
                                            electrons.pt).prod()
    weights.add("ele_reco", ele_reco_sf)
    # ID/iso SF is not split
    # in case of 2 tight electrons, we want to apply 0.5*(T1L2+T2L1) instead of T1T2
    weights_electrons_tight = evaluator['ele_id_tight'](
        electrons[df['is_tight_electron']].etasc,
        electrons[df['is_tight_electron']].pt).prod()
    if cfg.SF.DIELE_ID_SF.USE_AVERAGE:
        tight_dielectrons = electrons[df["is_tight_electron"]].distincts()
        l0 = evaluator['ele_id_loose'](tight_dielectrons.i0.etasc,
                                       tight_dielectrons.i0.pt).prod()
        t0 = evaluator['ele_id_tight'](tight_dielectrons.i0.etasc,
                                       tight_dielectrons.i0.pt).prod()
        l1 = evaluator['ele_id_loose'](tight_dielectrons.i1.etasc,
                                       tight_dielectrons.i1.pt).prod()
        t1 = evaluator['ele_id_tight'](tight_dielectrons.i1.etasc,
                                       tight_dielectrons.i1.pt).prod()
        weights_2e_tight = 0.5 * (l0 * t1 + l1 * t0)
        weights.add(
            "ele_id_tight",
            weights_electrons_tight * (tight_dielectrons.counts != 1) +
            weights_2e_tight * (tight_dielectrons.counts == 1))
    else:
        weights.add("ele_id_tight", weights_electrons_tight)
    weights.add(
        "ele_id_loose", evaluator['ele_id_loose'](
            electrons[~df['is_tight_electron']].etasc,
            electrons[~df['is_tight_electron']].pt).prod())

    # Photon ID and electron veto
    if cfg.SF.PHOTON.USETNP:
        weights.add(
            "photon_id_tight", evaluator['photon_id_tight_tnp'](np.abs(
                photons[df['is_tight_photon']].eta)).prod())
    else:
        weights.add(
            "photon_id_tight", evaluator['photon_id_tight'](
                photons[df['is_tight_photon']].eta,
                photons[df['is_tight_photon']].pt).prod())

    if year == 2016:
        csev_weight = evaluator["photon_csev"](photons.abseta,
                                               photons.pt).prod()
    elif year == 2017:
        csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * (
            photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94)
        csev_weight = evaluator['photon_csev'](csev_sf_index).prod()
    elif year == 2018:
        csev_weight = evaluator['photon_csev'](photons.pt,
                                               photons.abseta).prod()
    csev_weight[csev_weight == 0] = 1
    weights.add("photon_csev", csev_weight)

    return weights
Example #9
0
def setup_candidates(df, cfg):
    if df['is_data'] and extract_year(df['dataset']) != 2018:
        # 2016, 2017 data
        jes_suffix = ''
        jes_suffix_met = ''
    elif df['is_data']:
        # 2018 data
        jes_suffix = '_nom'
        jes_suffix_met = '_nom'
    else:
        # MC, all years
        jes_suffix = '_nom'
        if cfg.MET.JER:
            jes_suffix_met = '_jer'
        else:
            jes_suffix_met = '_nom'

    muons = JaggedCandidateArray.candidatesfromcounts(
        df['nMuon'],
        pt=df['Muon_pt'],
        eta=df['Muon_eta'],
        abseta=np.abs(df['Muon_eta']),
        phi=df['Muon_phi'],
        mass=0 * df['Muon_pt'],
        charge=df['Muon_charge'],
        looseId=df['Muon_looseId'],
        iso=df["Muon_pfRelIso04_all"],
        tightId=df['Muon_tightId'],
        dxy=df['Muon_dxy'],
        dz=df['Muon_dz'])

    # All muons must be at least loose
    muons = muons[muons.looseId \
                    & (muons.iso < cfg.MUON.CUTS.LOOSE.ISO) \
                    & (muons.pt > cfg.MUON.CUTS.LOOSE.PT) \
                    & (muons.abseta<cfg.MUON.CUTS.LOOSE.ETA) \
                    ]

    electrons = JaggedCandidateArray.candidatesfromcounts(
        df['nElectron'],
        pt=df['Electron_pt'],
        eta=df['Electron_eta'],
        abseta=np.abs(df['Electron_eta']),
        etasc=df['Electron_eta'] + df['Electron_deltaEtaSC'],
        absetasc=np.abs(df['Electron_eta'] + df['Electron_deltaEtaSC']),
        phi=df['Electron_phi'],
        mass=0 * df['Electron_pt'],
        charge=df['Electron_charge'],
        looseId=(df[cfg.ELECTRON.BRANCH.ID] >= 1),
        tightId=(df[cfg.ELECTRON.BRANCH.ID] == 4),
        dxy=np.abs(df['Electron_dxy']),
        dz=np.abs(df['Electron_dz']),
        barrel=np.abs(df['Electron_eta'] + df['Electron_deltaEtaSC']) <=
        1.4442)
    # All electrons must be at least loose
    pass_dxy = (electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.BARREL)) \
    | (~electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.ENDCAP))

    pass_dz = (electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.BARREL)) \
    | (~electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.ENDCAP))

    electrons = electrons[electrons.looseId \
                                    & (electrons.pt>cfg.ELECTRON.CUTS.LOOSE.PT) \
                                    & (electrons.absetasc<cfg.ELECTRON.CUTS.LOOSE.ETA) \
                                    & pass_dxy \
                                    & pass_dz
                                    ]

    if cfg.OVERLAP.ELECTRON.MUON.CLEAN:
        electrons = electrons[object_overlap(electrons,
                                             muons,
                                             dr=cfg.OVERLAP.ELECTRON.MUON.DR)]

    taus = JaggedCandidateArray.candidatesfromcounts(
        df['nTau'],
        pt=df['Tau_pt'],
        eta=df['Tau_eta'],
        abseta=np.abs(df['Tau_eta']),
        phi=df['Tau_phi'],
        mass=0 * df['Tau_pt'],
        decaymode=df[cfg.TAU.BRANCH.ID],
        iso=df[cfg.TAU.BRANCH.ISO])

    # For MC, add the matched gen-particle info for checking
    if not df['is_data']:
        kwargs = {'genpartflav': df['Tau_genPartFlav']}
        taus.add_attributes(**kwargs)

    taus = taus[ (taus.decaymode) \
                & (taus.pt > cfg.TAU.CUTS.PT)\
                & (taus.abseta < cfg.TAU.CUTS.ETA) \
                & ((taus.iso&2)==2)]

    if cfg.OVERLAP.TAU.MUON.CLEAN:
        taus = taus[object_overlap(taus, muons, dr=cfg.OVERLAP.TAU.MUON.DR)]
    if cfg.OVERLAP.TAU.ELECTRON.CLEAN:
        taus = taus[object_overlap(taus,
                                   electrons,
                                   dr=cfg.OVERLAP.TAU.ELECTRON.DR)]

    # choose the right branch name for photon ID bitmap depending on the actual name in the file (different between nano v5 and v7)
    if cfg.PHOTON.BRANCH.ID in df.keys():
        PHOTON_BRANCH_ID = cfg.PHOTON.BRANCH.ID
    else:
        PHOTON_BRANCH_ID = cfg.PHOTON.BRANCH.IDV7
    photons = JaggedCandidateArray.candidatesfromcounts(
        df['nPhoton'],
        pt=df['Photon_pt'],
        eta=df['Photon_eta'],
        abseta=np.abs(df['Photon_eta']),
        phi=df['Photon_phi'],
        mass=0 * df['Photon_pt'],
        looseId=(df[PHOTON_BRANCH_ID] >= 1) & df['Photon_electronVeto'],
        mediumId=(df[PHOTON_BRANCH_ID] >= 2) & df['Photon_electronVeto'],
        r9=df['Photon_r9'],
        barrel=df['Photon_isScEtaEB'],
    )
    photons = photons[photons.looseId \
              & (photons.pt > cfg.PHOTON.CUTS.LOOSE.pt) \
              & (photons.abseta < cfg.PHOTON.CUTS.LOOSE.eta)
              ]

    if cfg.OVERLAP.PHOTON.MUON.CLEAN:
        photons = photons[object_overlap(photons,
                                         muons,
                                         dr=cfg.OVERLAP.PHOTON.MUON.DR)]
    if cfg.OVERLAP.PHOTON.ELECTRON.CLEAN:
        photons = photons[object_overlap(photons,
                                         electrons,
                                         dr=cfg.OVERLAP.PHOTON.ELECTRON.DR)]

    ak4 = JaggedCandidateArray.candidatesfromcounts(
        df['nJet'],
        pt=df[f'Jet_pt{jes_suffix}'] if
        (df['is_data'] or cfg.AK4.JER) else df[f'Jet_pt{jes_suffix}'] /
        df['Jet_corr_JER'],
        eta=df['Jet_eta'],
        abseta=np.abs(df['Jet_eta']),
        phi=df['Jet_phi'],
        mass=np.zeros_like(df['Jet_pt']),
        looseId=(
            df['Jet_jetId']
            & 2) == 2,  # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto
        tightId=(
            df['Jet_jetId']
            & 2) == 2,  # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto
        puid=((df['Jet_puId'] & 2 > 0) |
              ((df[f'Jet_pt{jes_suffix}'] if
                (df['is_data'] or cfg.AK4.JER) else df[f'Jet_pt{jes_suffix}'] /
                df['Jet_corr_JER']) > 50)),  # medium pileup jet ID
        csvv2=df["Jet_btagCSVV2"],
        deepcsv=df['Jet_btagDeepB'],
        nef=df['Jet_neEmEF'],
        nhf=df['Jet_neHEF'],
        chf=df['Jet_chHEF'],
        ptraw=df['Jet_pt'] * (1 - df['Jet_rawFactor']),
        nconst=df['Jet_nConstituents'],
        hadflav=0 * df['Jet_pt'] if df['is_data'] else df['Jet_hadronFlavour'])

    # Before cleaning, apply HEM veto
    hem_ak4 = ak4[(ak4.pt > 30) & (-3.0 < ak4.eta) & (ak4.eta < -1.3) &
                  (-1.57 < ak4.phi) & (ak4.phi < -0.87)]
    df['hemveto'] = hem_ak4.counts == 0

    # B jets have their own overlap cleaning,
    # so deal with them before applying filtering to jets
    btag_discriminator = getattr(ak4, cfg.BTAG.algo)
    btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp]
    bjets = ak4[
        (ak4.pt > cfg.BTAG.PT) \
        & (ak4.abseta < cfg.BTAG.ETA) \
        & (btag_discriminator > btag_cut)
    ]

    if cfg.OVERLAP.BTAG.MUON.CLEAN:
        bjets = bjets[object_overlap(bjets, muons,
                                     dr=cfg.OVERLAP.BTAG.MUON.DR)]
    if cfg.OVERLAP.BTAG.ELECTRON.CLEAN:
        bjets = bjets[object_overlap(bjets,
                                     electrons,
                                     dr=cfg.OVERLAP.BTAG.ELECTRON.DR)]
    if cfg.OVERLAP.BTAG.PHOTON.CLEAN:
        bjets = bjets[object_overlap(bjets,
                                     photons,
                                     dr=cfg.OVERLAP.BTAG.PHOTON.DR)]

    ak4 = ak4[ak4.looseId]

    if cfg.OVERLAP.AK4.MUON.CLEAN:
        ak4 = ak4[object_overlap(ak4, muons, dr=cfg.OVERLAP.AK4.MUON.DR)]
    if cfg.OVERLAP.AK4.ELECTRON.CLEAN:
        ak4 = ak4[object_overlap(ak4,
                                 electrons,
                                 dr=cfg.OVERLAP.AK4.ELECTRON.DR)]
    if cfg.OVERLAP.AK4.PHOTON.CLEAN:
        ak4 = ak4[object_overlap(ak4, photons, dr=cfg.OVERLAP.AK4.PHOTON.DR)]

    if df['is_data']:
        msd = df[f'FatJet_msoftdrop{jes_suffix}']
    else:
        msd = df[f'FatJet_msoftdrop{jes_suffix}'] / (
            df['FatJet_msoftdrop_corr_JMR'] * df['FatJet_msoftdrop_corr_JMS'])
        if not cfg.AK8.JER:
            msd = msd / df['FatJet_corr_JER']

    ak8 = JaggedCandidateArray.candidatesfromcounts(
        df['nFatJet'],
        pt=df[f'FatJet_pt{jes_suffix}'] if
        (df['is_data'] or cfg.AK8.JER) else df[f'FatJet_pt{jes_suffix}'] /
        df['FatJet_corr_JER'],
        eta=df['FatJet_eta'],
        abseta=np.abs(df['FatJet_eta']),
        phi=df['FatJet_phi'],
        mass=msd,
        tightId=(df['FatJet_jetId'] & 2) == 2,  # Tight
        csvv2=df["FatJet_btagCSVV2"],
        deepcsv=df['FatJet_btagDeepB'],
        tau1=df['FatJet_tau1'],
        tau2=df['FatJet_tau2'],
        tau21=df['FatJet_tau2'] / df['FatJet_tau1'],
        wvsqcd=df['FatJet_deepTag_WvsQCD'],
        wvsqcdmd=df['FatJet_deepTagMD_WvsQCD'],
        zvsqcd=df['FatJet_deepTag_ZvsQCD'],
        zvsqcdmd=df['FatJet_deepTagMD_ZvsQCD'],
        tvsqcd=df['FatJet_deepTag_TvsQCD'],
        tvsqcdmd=df['FatJet_deepTagMD_TvsQCD'],
        wvstqcd=df['FatJet_deepTag_WvsQCD'] *
        (1 - df['FatJet_deepTag_TvsQCD']) /
        (1 - df['FatJet_deepTag_WvsQCD'] * df['FatJet_deepTag_TvsQCD']),
        wvstqcdmd=df['FatJet_deepTagMD_WvsQCD'] *
        (1 - df['FatJet_deepTagMD_TvsQCD']) /
        (1 - df['FatJet_deepTagMD_WvsQCD'] * df['FatJet_deepTagMD_TvsQCD']),
    )
    ak8 = ak8[ak8.tightId & object_overlap(ak8, muons)
              & object_overlap(ak8, electrons) & object_overlap(ak8, photons)]

    if extract_year(df['dataset']) == 2017:
        met_branch = 'METFixEE2017'
    else:
        met_branch = 'MET'

    met_pt = df[f'{met_branch}_pt{jes_suffix_met}']
    met_phi = df[f'{met_branch}_phi{jes_suffix_met}']

    return met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons
Example #10
0
def data_driven_qcd_dataset(dataset):
    """Dataset name to use for data-driven QCD estimate"""
    year = extract_year(dataset)
    return f"QCD_data_{year}"
Example #11
0
def candidate_weights(weights, df, evaluator, muons, electrons, photons):
    year = extract_year(df['dataset'])
    # Muon ID and Isolation for tight and loose WP
    # Function of pT, eta (Order!)
    weights.add(
        "muon_id_tight",
        evaluator['muon_id_tight'](muons[df['is_tight_muon']].pt,
                                   muons[df['is_tight_muon']].abseta).prod())
    weights.add(
        "muon_iso_tight",
        evaluator['muon_iso_tight'](muons[df['is_tight_muon']].pt,
                                    muons[df['is_tight_muon']].abseta).prod())
    weights.add(
        "muon_id_loose",
        evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt,
                                   muons[~df['is_tight_muon']].abseta).prod())
    weights.add(
        "muon_iso_loose",
        evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt,
                                    muons[~df['is_tight_muon']].abseta).prod())

    # Electron ID and reco
    # Function of eta, pT (Other way round relative to muons!)

    # For 2017, the reco SF is split below/above 20 GeV
    if year == 2017:
        high_et = electrons.pt > 20
        ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et],
                                            electrons.pt[high_et]).prod()
        ele_reco_sf *= evaluator['ele_reco_pt_lt_20'](
            electrons.etasc[~high_et], electrons.pt[~high_et]).prod()
    else:
        ele_reco_sf = evaluator['ele_reco'](electrons.etasc,
                                            electrons.pt).prod()
    weights.add("ele_reco", ele_reco_sf)
    # ID/iso SF is not split
    weights.add(
        "ele_id_tight", evaluator['ele_id_tight'](
            electrons[df['is_tight_electron']].etasc,
            electrons[df['is_tight_electron']].pt).prod())
    weights.add(
        "ele_id_loose", evaluator['ele_id_loose'](
            electrons[~df['is_tight_electron']].etasc,
            electrons[~df['is_tight_electron']].pt).prod())

    # Photon ID and electron veto
    weights.add(
        "photon_id_tight",
        evaluator['photon_id_tight'](photons[df['is_tight_photon']].eta,
                                     photons[df['is_tight_photon']].pt).prod())

    if year == 2016:
        csev_weight = evaluator["photon_csev"](photons.abseta,
                                               photons.pt).prod()
    elif year == 2017:
        csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * (
            photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94)
        csev_weight = evaluator['photon_csev'](csev_sf_index).prod()
    elif year == 2018:
        csev_weight = evaluator['photon_csev'](photons.pt,
                                               photons.abseta).prod()
    csev_weight[csev_weight == 0] = 1
    weights.add("photon_csev", csev_weight)

    return weights
Example #12
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_v_jet'] = has_v_jet(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if not df['is_data']:
            gen = setup_gen_candidates(df)
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']:
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg)

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index=ak4.pt.argmax()

        elejet_pairs = ak4[:,:1].cross(electrons)
        df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:,:1].cross(muons)
        df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min()

        # Photons
        # Angular distance leading photon - leading jet
        phojet_pairs = ak4[:,:1].cross(photons[:,:1])
        df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4)
        selection = processor.PackedSelection()



        # Triggers
        pass_all = np.ones(df.size)==1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)
        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts==0)
        selection.add('veto_muo', muons.counts==0)
        selection.add('veto_photon', photons.counts==0)
        selection.add('veto_tau', taus.counts==0)
        selection.add('veto_b', bjets.counts==0)
        selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL)

        if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size)==1)

        # AK4 Jet
        leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \
                         & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any()
        selection.add('leadak4_pt_eta', leadak4_pt_eta)

        selection.add('leadak4_id',(ak4.tightId[leadak4_index] \
                                                    & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \
                                                    & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any())

        # AK8 Jet
        leadak8_index=ak8.pt.argmax()
        leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \
                         & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any()
        selection.add('leadak8_pt_eta', leadak8_pt_eta)

        selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any())

        # Mono-V selection
        selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any())
        selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \
                                    & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any())
        selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD)
                                    & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE)
                                    & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any())
        selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any())

        selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass"))
        selection.add('only_one_ak8', ak8.counts==1)

        # Dimuon CR
        leadmuon_index=muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge==0).any())
        selection.add('two_muons', muons.counts==2)

        # Single muon CR
        selection.add('one_muon', muons.counts==1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index=electrons.pt.argmax()


        selection.add('one_electron', electrons.counts==1)
        selection.add('two_electrons', electrons.counts==2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge==0).any())
        selection.add('two_electrons', electrons.counts==2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index=photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts==1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if gen_v_pt is not None:
            output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight'])

        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons, electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt)

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask].flatten()]
                output['kinematics']['met_phi'] += [met_phi[mask].flatten()]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()]
                output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()]
                output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()]
                output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()]
                output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()]
                output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()]
                output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()]
                output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()]
                output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()]
                output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()]


        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] +=  df['genEventSumw']
            output['sumw2'][dataset] +=  df['genEventSumw2']
            output['sumw_pileup'][dataset] +=  weights.partial_weight(include=['pileup']).sum()

        regions = monojet_regions(cfg)

        for region, cuts in regions.items():
            region_weights = copy.deepcopy(weights)
            if not df['is_data']:
                if re.match(r'cr_(\d+)e.*', region):
                    region_weights.add('trigger', np.ones(df.size))
                elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region):
                    region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    region_weights.add('trigger', np.ones(df.size))

            if not df['is_data']:
                genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)]
                leadak8 = ak8[ak8.pt.argmax()]
                leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8)
                matched_leadak8 = leadak8[leadak8_matched_mask]
                unmatched_leadak8 = leadak8[~leadak8_matched_mask]
                for wp in ['loose','loosemd','tight','tightmd']:
                    if re.match(r'.*_{wp}_v.*', region):

                        if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod()
                        else:
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \
                                    * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod()

                        region_weights.add('wtag_{wp}', matched_weights)



            # Blinding
            if(self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all']+=df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum()

            mask = selection.all(*cuts)


            if cfg.RUN.SAVE.TREE:
                def fill_tree(variable, values):
                    treeacc = processor.column_accumulator(values)
                    name = f'tree_{region}_{variable}'
                    if dataset in output[name].keys():
                        output[name][dataset] += treeacc
                    else:
                        output[name][dataset] = treeacc
                if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']:
                    fill_tree('recoil',df['recoil_pt'][mask].flatten())
                    fill_tree('weight',region_weights.weight()[mask].flatten())
                    if gen_v_pt is not None:
                        fill_tree('gen_v_pt',gen_v_pt[mask].flatten())
                    else:
                        fill_tree('gen_v_pt', -1 * np.ones(sum(mask)))
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])


            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=region_weights.weight()[mask]
                                  )

            fill_mult('ak8_mult', ak8)
            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult',bjets)
            fill_mult('loose_ele_mult',electrons)
            fill_mult('tight_ele_mult',electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult',muons)
            fill_mult('tight_muo_mult',muons[df['is_tight_muon']])
            fill_mult('tau_mult',taus)
            fill_mult('photon_mult',photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  **kwargs
                                  )
            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask])

            ezfill('ak4_eta',    jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi',    jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_pt',     jetpt=ak4[mask].pt.flatten(),   weight=w_alljets)

            # Leading ak4
            w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask])
            ezfill('ak4_eta0',   jeteta=ak4[leadak4_index].eta[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_phi0',   jetphi=ak4[leadak4_index].phi[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_pt0',    jetpt=ak4[leadak4_index].pt[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_ptraw0',    jetpt=ak4[leadak4_index].ptraw[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_chf0',    frac=ak4[leadak4_index].chf[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_nhf0',    frac=ak4[leadak4_index].nhf[mask].flatten(),      weight=w_leadak4)

            ezfill('drelejet',    dr=df['dREleJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drmuonjet',    dr=df['dRMuonJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drphotonjet',    dr=df['dRPhotonJet'][mask],  weight=region_weights.weight()[mask])

            # AK8 jets
            if region=='inclusive' or region.endswith('v'):
                # All
                w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta',    jeteta=ak8[mask].eta.flatten(), weight=w_allak8)
                ezfill('ak8_phi',    jetphi=ak8[mask].phi.flatten(), weight=w_allak8)
                ezfill('ak8_pt',     jetpt=ak8[mask].pt.flatten(),   weight=w_allak8)
                ezfill('ak8_mass',   mass=ak8[mask].mass.flatten(),  weight=w_allak8)

                # Leading
                w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta0',       jeteta=ak8[leadak8_index].eta[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_phi0',       jetphi=ak8[leadak8_index].phi[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_pt0',        jetpt=ak8[leadak8_index].pt[mask].flatten(),      weight=w_leadak8 )
                ezfill('ak8_mass0',      mass=ak8[leadak8_index].mass[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_tau210',     tau21=ak8[leadak8_index].tau21[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcd0',    tagger=ak8[leadak8_index].wvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcdmd0',  tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcd0',    tagger=ak8[leadak8_index].zvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcdmd0',  tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(),     weight=w_leadak8)

                # histogram with only gen-matched lead ak8 pt
                if not df['is_data']:
                    w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask])
                    ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(),      weight=w_matchedleadak8 )


                # Dimuon specifically for deepak8 mistag rate measurement
                if 'inclusive_v' in region:
                    ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )

            # MET
            ezfill('dpfcalo',            dpfcalo=df["dPFCalo"][mask],       weight=region_weights.weight()[mask] )
            ezfill('met',                met=met_pt[mask],            weight=region_weights.weight()[mask] )
            ezfill('met_phi',            phi=met_phi[mask],            weight=region_weights.weight()[mask] )
            ezfill('recoil',             recoil=df["recoil_pt"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_phi',         phi=df["recoil_phi"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_nopog',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask])
            ezfill('recoil_nopref',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['prefire'])[mask])
            ezfill('recoil_nopu',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('recoil_notrg',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['trigger'])[mask])
            ezfill('ak4_pt0_over_recoil',    ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask],      weight=region_weights.weight()[mask])
            ezfill('dphijm',             dphi=df["minDPhiJetMet"][mask],    weight=region_weights.weight()[mask] )
            ezfill('dphijr',             dphi=df["minDPhiJetRecoil"][mask],    weight=region_weights.weight()[mask] )

            if 'noveto' in region:
                continue

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt',   pt=muons.pt[mask].flatten(),    weight=w_allmu )
                ezfill('muon_mt',   mt=df['MT_mu'][mask],           weight=region_weights.weight()[mask])
                ezfill('muon_eta',  eta=muons.eta[mask].flatten(),  weight=w_allmu)
                ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu)
                ezfill('muon_phi',  phi=muons.phi[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dxy',  dxy=muons.dxy[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dz',  dz=muons.dz[mask].flatten(),  weight=w_allmu)

                # Leading muon
                w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt0',   pt=muons[leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta0',  eta=muons[leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi0',  phi=muons[leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dxy0',  dxy=muons[leadmuon_index].dxy[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dz0',  dz=muons[leadmuon_index].dz[mask].flatten(),  weight=w_leadmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask])

                ezfill('dimuon_pt',     pt=dimuons.pt[mask].flatten(),              weight=w_dimu)
                ezfill('dimuon_eta',    eta=dimuons.eta[mask].flatten(),            weight=w_dimu)
                ezfill('dimuon_mass',   dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu )
                ezfill('dimuon_dr',   dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu )

                ezfill('muon_pt1',   pt=muons[~leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta1',  eta=muons[~leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi1',  phi=muons[~leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt',   pt=electrons.pt[mask].flatten(),    weight=w_allel)
                ezfill('electron_mt',   mt=df['MT_el'][mask],               weight=region_weights.weight()[mask])
                ezfill('electron_eta',  eta=electrons.eta[mask].flatten(),  weight=w_allel)
                ezfill('electron_phi',  phi=electrons.phi[mask].flatten(),  weight=w_allel)
                ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel)
                ezfill('electron_dz',  dz=electrons.dz[mask].flatten(),  weight=w_allel)
                ezfill('electron_dxy',  dxy=electrons.dxy[mask].flatten(),  weight=w_allel)

                w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt0',   pt=electrons[leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta0',  eta=electrons[leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi0',  phi=electrons[leadelectron_index].phi[mask].flatten(),  weight=w_leadel)

                w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_tightid1',  id=electrons[~leadelectron_index].tightId[mask].flatten(),  weight=w_trailel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask])
                ezfill('dielectron_pt',     pt=dielectrons.pt[mask].flatten(),                  weight=w_diel)
                ezfill('dielectron_eta',    eta=dielectrons.eta[mask].flatten(),                weight=w_diel)
                ezfill('dielectron_mass',   dilepton_mass=dielectrons.mass[mask].flatten(),     weight=w_diel)
                ezfill('dielectron_dr',   dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel )

                ezfill('electron_pt1',   pt=electrons[~leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta1',  eta=electrons[~leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi1',  phi=electrons[~leadelectron_index].phi[mask].flatten(),  weight=w_leadel)
            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]);
                ezfill('photon_pt0',              pt=photons[leadphoton_index].pt[mask].flatten(),    weight=w_leading_photon)
                ezfill('photon_eta0',             eta=photons[leadphoton_index].eta[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_phi0',             phi=photons[leadphoton_index].phi[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask])

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask])
            ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask])

            ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
        return output
Example #13
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_dress']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]
        bjets = bjets[bjets.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=4.7)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=4.7)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',
                      np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)

        if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018
                and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size) == 1)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())
        selection.add('two_electrons', electrons.counts == 2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)
        for region, cuts in regions.items():
            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(
                        *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=weights.weight()[mask])

            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                weights.partial_weight(exclude=['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], weights.weight()[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo',
                   dpfcalo=df["dPFCalo"][mask],
                   weight=weights.weight()[mask])
            ezfill('met', met=met_pt[mask], weight=weights.weight()[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=weights.weight()[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=weights.weight()[mask])

            ezfill('dphijj',
                   dphi=df["dphijj"][mask],
                   weight=weights.weight()[mask])
            ezfill('detajj',
                   deta=df["detajj"][mask],
                   weight=weights.weight()[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask])

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=weights.weight()[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_mt',
                       mt=df['MT_mu'][mask],
                       weight=weights.weight()[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask],
                                       weights.weight()[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=weights.weight()[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask],
                                      weights.weight()[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask],
                    weights.weight()[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask])

            # PV
            ezfill('npv',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.weight()[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.weight()[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
        return output
    def process(self, df):
        self._configure(df)
        output = self.accumulator.identity()
        dataset = df['dataset']

        # Lumi mask
        year = extract_year(dataset)
        if is_data(dataset):
            if year == 2016:
                json = bucoffea_path(
                    'data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
                )
            elif year == 2017:
                json = bucoffea_path(
                    'data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt'
                )
            elif year == 2018:
                json = bucoffea_path(
                    'data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'
                )
            lumi_mask = LumiMask(json)(df['run'], df['luminosityBlock'])
        else:
            lumi_mask = np.ones(df.size) == 1

        # MET filters
        if is_data(dataset):
            filt_met = mask_and(df, cfg.FILTERS.DATA)
        else:
            filt_met = mask_and(df, cfg.FILTERS.MC)

        if year == 2016:
            trigger = 'HLT_Photon175'
        else:
            trigger = 'HLT_Photon200'

        photons = setup_photons(df)

        ak4 = setup_jets(df)
        ak4 = ak4[
                  object_overlap(ak4, photons) \
                  & ak4.tightId \
                  & (ak4.pt > 100) \
                  & (ak4.abseta < 2.4)
                  ]

        event_mask = filt_met \
                     & lumi_mask \
                     & (ak4.counts > 0) \
                     & df[trigger] \
                     & (df['MET_pt'] < 60)

        # Generator weight
        weights = processor.Weights(size=df.size, storeIndividual=True)

        if is_data(dataset):
            weights.add('gen', np.ones(df.size))
        else:
            weights.add('gen', df['Generator_weight'])

        photon_kinematics = (photons.pt > 200) & (photons.barrel)

        # Medium
        vals = photons[photon_kinematics & photons.mediumId].sieie[event_mask]
        pt = photons[photon_kinematics & photons.mediumId].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # No Sieie
        vals = photons[photon_kinematics
                       & medium_id_no_sieie(photons)].sieie[event_mask]
        pt = photons[photon_kinematics
                     & medium_id_no_sieie(photons)].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium_nosieie',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # No Sieie, inverted isolation
        vals = photons[photon_kinematics
                       & medium_id_no_sieie_inv_iso(photons)].sieie[event_mask]
        pt = photons[photon_kinematics
                     & medium_id_no_sieie_inv_iso(photons)].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium_nosieie_invertiso',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # Keep track of weight sum
        if not is_data(dataset):
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
        return output
Example #15
0
def met_trigger_sf(weights, diak4, df, apply_categorized=True):
    '''
    Data/MC SF for the MET trigger, determined as the ratio of 
    two sigmoid functions which are fit to data and MC efficiencies.
    If apply_categorized is set to True, two categories of SF will be applied,
    depending on the leading two jets. Otherwise, one single SF will be applied.
    '''
    year = extract_year(df['dataset'])
    x = df['recoil_pt']

    data_params = {
        'two_central_jets': {
            2017: (0.044, 164.881, 0.990),
            2018: (0.045, 176.266, 0.993)
        },
        'mixed': {
            2017: (0.039, 173.351, 0.986),
            2018: (0.041, 182.607, 0.990)
        },
        'inclusive': {
            2017: (0.043, 167.896, 0.99),
            2018: (0.044, 178.364, 0.992)
        }
    }

    mc_params = {
        'two_central_jets': {
            2017: (0.046, 144.881, 0.994),
            2018: (0.052, 152.838, 0.993)
        },
        'mixed': {
            2017: (0.039, 154.035, 0.992),
            2018: (0.048, 159.329, 0.992)
        },
        'inclusive': {
            2017: (0.044, 147.932, 0.994),
            2018: (0.051, 155.016, 0.993)
        }
    }

    if year == 2016:
        sf = np.ones(df.size)
    else:
        if apply_categorized:
            # Two categories: Two central jets & others
            two_central_jets = (diak4.i0.abseta < 2.5) & (diak4.i1.abseta <
                                                          2.5)
            two_hf_jets = (diak4.i0.abseta > 3.0) & (diak4.i1.eta > 3.0)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_hf_jets)

            sf = np.where(
                two_central_jets,
                sigmoid3(x, *data_params['two_central_jets'][year]) /
                sigmoid3(x, *mc_params['two_central_jets'][year]),
                sigmoid3(x, *data_params['mixed'][year]) /
                sigmoid3(x, *mc_params['mixed'][year]))

        else:
            sf = sigmoid3(x, *data_params['inclusive'][year]) / sigmoid3(
                x, *mc_params['inclusive'][year])

    sf[np.isnan(sf) | np.isinf(sf)] == 1
    weights.add("trigger_met", sf)
Example #16
0
def get_veto_weights(df, evaluator, electrons, muons, taus, do_variations=False):
    """
    Calculate veto weights for SR W

    The weights are effectively:

        w = product(1-SF)

    where the product runs overveto-able e, mu, tau.
    """
    veto_weights = processor.Weights(size=df.size, storeIndividual=True)

    variations = ["nominal"]
    if do_variations:
        variations.extend([
                      'ele_reco_up','ele_reco_dn',
                      'ele_id_up','ele_id_dn',
                      'muon_id_up','muon_id_dn',
                      'muon_iso_up','muon_iso_dn',
                      'tau_id_up','tau_id_dn'
                      ])

    for variation in variations:
        def varied_weight(sfname, *args):
            '''Helper function to easily get the correct weights for a given variation'''

            # For the nominal variation, just pass through
            if 'nominal' in variation:
                return evaluator[sfname](*args)

            # If this variation is unrelated to the SF at hand,
            # pass through as well
            if not (re.sub('_(up|dn)', '', variation) in sfname):
                return evaluator[sfname](*args)

            # Direction of variation
            sgn = 1 if variation.endswith("up") else -1
            return evaluator[sfname](*args) + sgn * evaluator[f"{sfname}_error"](*args)


        ### Electrons
        if extract_year(df['dataset']) == 2017:
            high_et = electrons.pt>20

            # Low pt SFs
            low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et])
            ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args)
            ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args)

            # High pt SFs
            high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et])

            ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args)
            ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args)

            # Combine
            veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod()
        else:
            # No split for 2018
            args = (electrons.etasc, electrons.pt)
            ele_reco_sf = varied_weight("ele_reco", *args)
            ele_id_sf = varied_weight("ele_id_loose", *args)

            # Combine
            veto_weight_ele = (1 - ele_id_sf*ele_reco_sf).prod()

        ### Muons
        args = (muons.pt, muons.abseta)
        veto_weight_muo = (1 - varied_weight("muon_id_loose", *args)*varied_weight("muon_iso_loose", *args)).prod()

        ### Taus
        # Taus have their variations saves as separate histograms,
        # so our cool trick from above is replaced by the pedestrian way
        if "tau_id" in variation:
            direction = variation.split("_")[-1]
            tau_sf_name = f"tau_id_{direction}"
        else:
            tau_sf_name = "tau_id"
        veto_weight_tau = (1 - evaluator[tau_sf_name](taus.pt)).prod()

        ### Combine
        total = veto_weight_ele * veto_weight_muo * veto_weight_tau

        # Cap weights just in case
        total[np.abs(total)>5] = 1
        veto_weights.add(variation, total)

    return veto_weights
Example #17
0
def setup_candidates(df, cfg):
    if df['is_data'] and extract_year(df['dataset']) != 2018:
        # 2016, 2017 data
        jes_suffix = ''
        jes_suffix_met = ''
    elif df['is_data']:
        # 2018 data
        jes_suffix = '_nom'
        jes_suffix_met = '_nom'
    else:
        # MC, all years
        jes_suffix = '_nom'
        jes_suffix_met = '_jer'

    muons = JaggedCandidateArray.candidatesfromcounts(
        df['nMuon'],
        pt=df['Muon_pt'],
        eta=df['Muon_eta'],
        abseta=np.abs(df['Muon_eta']),
        phi=df['Muon_phi'],
        mass=0 * df['Muon_pt'],
        charge=df['Muon_charge'],
        looseId=df['Muon_looseId'],
        iso=df["Muon_pfRelIso04_all"],
        tightId=df['Muon_tightId'],
        dxy=df['Muon_dxy'],
        dz=df['Muon_dz']
    )

    # All muons must be at least loose
    muons = muons[muons.looseId \
                    & (muons.iso < cfg.MUON.CUTS.LOOSE.ISO) \
                    & (muons.pt > cfg.MUON.CUTS.LOOSE.PT) \
                    & (muons.abseta<cfg.MUON.CUTS.LOOSE.ETA) \
                    ]


    electrons = JaggedCandidateArray.candidatesfromcounts(
        df['nElectron'],
        pt=df['Electron_pt'],
        eta=df['Electron_eta'],
        abseta=np.abs(df['Electron_eta']),
        phi=df['Electron_phi'],
        mass=0 * df['Electron_pt'],
        charge=df['Electron_charge'],
        looseId=(df[cfg.ELECTRON.BRANCH.ID]>=1),
        tightId=(df[cfg.ELECTRON.BRANCH.ID]==4),
        dxy=np.abs(df['Electron_dxy']),
        dz=np.abs(df['Electron_dz']),
        barrel=np.abs(df['Electron_eta']) <= 1.479
    )
    # All electrons must be at least loose
    pass_dxy = (electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.BARREL)) \
    | (~electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.ENDCAP))

    pass_dz = (electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.BARREL)) \
    | (~electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.ENDCAP))

    electrons = electrons[electrons.looseId \
                                    & (electrons.pt>cfg.ELECTRON.CUTS.LOOSE.PT) \
                                    & (electrons.abseta<cfg.ELECTRON.CUTS.LOOSE.ETA) \
                                    & pass_dxy \
                                    & pass_dz
                                    ]

    if cfg.OVERLAP.ELECTRON.MUON.CLEAN:
        electrons = electrons[object_overlap(electrons, muons, dr=cfg.OVERLAP.ELECTRON.MUON.DR)]


    taus = JaggedCandidateArray.candidatesfromcounts(
        df['nTau'],
        pt=df['Tau_pt'],
        eta=df['Tau_eta'],
        abseta=np.abs(df['Tau_eta']),
        phi=df['Tau_phi'],
        mass=0 * df['Tau_pt'],
        decaymode=df['Tau_idDecayMode'],
        iso=df['Tau_idMVAoldDM2017v2'],
    )

    taus = taus[ (taus.decaymode) \
                & (taus.pt > cfg.TAU.CUTS.PT)\
                & (taus.abseta < cfg.TAU.CUTS.ETA) \
                & ((taus.iso&2)==2)]

    if cfg.OVERLAP.TAU.MUON.CLEAN:
        taus = taus[object_overlap(taus, muons, dr=cfg.OVERLAP.TAU.MUON.DR)]
    if cfg.OVERLAP.TAU.ELECTRON.CLEAN:
        taus = taus[object_overlap(taus, electrons, dr=cfg.OVERLAP.TAU.ELECTRON.DR)]

    photons = JaggedCandidateArray.candidatesfromcounts(
        df['nPhoton'],
        pt=df['Photon_pt'],
        eta=df['Photon_eta'],
        abseta=np.abs(df['Photon_eta']),
        phi=df['Photon_phi'],
        mass=0*df['Photon_pt'],
        looseId=(df[cfg.PHOTON.BRANCH.ID]>=1) & df['Photon_electronVeto'],
        mediumId=(df[cfg.PHOTON.BRANCH.ID]>=2) & df['Photon_electronVeto'],
        r9=df['Photon_r9'],
        barrel=np.abs(df['Photon_eta']) < 1.479,
    )
    photons = photons[photons.looseId \
              & (photons.pt > cfg.PHOTON.CUTS.LOOSE.pt) \
              & (photons.abseta < cfg.PHOTON.CUTS.LOOSE.eta)
              ]

    if cfg.OVERLAP.PHOTON.MUON.CLEAN:
        photons = photons[object_overlap(photons, muons, dr=cfg.OVERLAP.PHOTON.MUON.DR)]
    if cfg.OVERLAP.PHOTON.ELECTRON.CLEAN:
        photons = photons[object_overlap(photons, electrons, dr=cfg.OVERLAP.PHOTON.ELECTRON.DR)]

    ak4 = JaggedCandidateArray.candidatesfromcounts(
        df['nJet'],
        pt=df[f'Jet_pt{jes_suffix}'],
        eta=df['Jet_eta'],
        abseta=np.abs(df['Jet_eta']),
        phi=df['Jet_phi'],
        mass=np.zeros_like(df['Jet_pt']),
        looseId=(df['Jet_jetId']&2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto
        tightId=(df['Jet_jetId']&2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto
        puid=((df['Jet_puId']&2>0) | (df[f'Jet_pt{jes_suffix}']>50)), # medium pileup jet ID
        csvv2=df["Jet_btagCSVV2"],
        deepcsv=df['Jet_btagDeepB'],
        # nef=df['Jet_neEmEF'],
        nhf=df['Jet_neHEF'],
        chf=df['Jet_chHEF'],
        ptraw=df['Jet_pt']*(1-df['Jet_rawFactor']),
        nconst=df['Jet_nConstituents']
        # clean=df['Jet_cleanmask']
        # cef=df['Jet_chEmEF'],
    )
    # Before cleaning, apply HEM veto
    hem_ak4 = ak4[ (ak4.pt>30) &
        (-3.0 < ak4.eta) &
        (ak4.eta < -1.3) &
        (-1.57 < ak4.phi) &
        (ak4.phi < -0.87)
        ]
    df['hemveto'] = hem_ak4.counts == 0

    # B jets have their own overlap cleaning,
    # so deal with them before applying filtering to jets
    btag_discriminator = getattr(ak4, cfg.BTAG.algo)
    btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp]
    bjets = ak4[
        (ak4.looseId) \
        & (ak4.pt > cfg.BTAG.PT) \
        & (ak4.abseta < cfg.BTAG.ETA) \
        & (btag_discriminator > btag_cut)
    ]

    if cfg.OVERLAP.BTAG.MUON.CLEAN:
        bjets = bjets[object_overlap(bjets, muons, dr=cfg.OVERLAP.BTAG.MUON.DR)]
    if cfg.OVERLAP.BTAG.ELECTRON.CLEAN:
        bjets = bjets[object_overlap(bjets, electrons, dr=cfg.OVERLAP.BTAG.ELECTRON.DR)]
    if cfg.OVERLAP.BTAG.PHOTON.CLEAN:
        bjets = bjets[object_overlap(bjets, photons, dr=cfg.OVERLAP.BTAG.PHOTON.DR)]

    ak4 = ak4[ak4.looseId]

    if cfg.OVERLAP.AK4.MUON.CLEAN:
        ak4 = ak4[object_overlap(ak4, muons, dr=cfg.OVERLAP.AK4.MUON.DR)]
    if cfg.OVERLAP.AK4.ELECTRON.CLEAN:
        ak4 = ak4[object_overlap(ak4, electrons, dr=cfg.OVERLAP.AK4.ELECTRON.DR)]
    if cfg.OVERLAP.AK4.PHOTON.CLEAN:
        ak4 = ak4[object_overlap(ak4, photons, dr=cfg.OVERLAP.AK4.PHOTON.DR)]


    ak8 = JaggedCandidateArray.candidatesfromcounts(
        df['nFatJet'],
        pt=df[f'FatJet_pt{jes_suffix}'],
        eta=df['FatJet_eta'],
        abseta=np.abs(df['FatJet_eta']),
        phi=df['FatJet_phi'],
        mass=df[f'FatJet_msoftdrop{jes_suffix}'],
        tightId=(df['FatJet_jetId']&2) == 2, # Tight
        csvv2=df["FatJet_btagCSVV2"],
        deepcsv=df['FatJet_btagDeepB'],
        tau1=df['FatJet_tau1'],
        tau2=df['FatJet_tau2'],
        tau21=df['FatJet_tau2']/df['FatJet_tau1'],
        wvsqcd=df['FatJet_deepTag_WvsQCD'],
        wvsqcdmd=df['FatJet_deepTagMD_WvsQCD'],
        zvsqcd=df['FatJet_deepTag_ZvsQCD'],
        zvsqcdmd=df['FatJet_deepTagMD_ZvsQCD']
    )
    ak8 = ak8[ak8.tightId & object_overlap(ak8, muons) & object_overlap(ak8, electrons) & object_overlap(ak8, photons)]

    if extract_year(df['dataset']) == 2017:
        met_branch = 'METFixEE2017'
    else:
        met_branch = 'MET'

    met_pt = df[f'{met_branch}_pt{jes_suffix_met}']
    met_phi = df[f'{met_branch}_phi{jes_suffix_met}']

    return met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons