def qcdewk_weight(ev, source, nsig, parent, input_paths, variations, input_df):
    weights = {}
    if parent not in input_paths:
        weights[""] = np.ones(ev.size, dtype=np.float32)
        for variation in variations[1:]:
            weights[variation] = np.zeros(ev.size, dtype=np.float32)
    else:
        indices = get_bin_indices(
            [ev.GenPartBoson(ev, 'pt')],
            [input_df.index.get_level_values("bin_min").values],
            [input_df.index.get_level_values("bin_max").values],
            1,
        )[:, 0]
        corrections = input_df.iloc[indices]
        weights[""] = corrections[""].values.astype(np.float32)
        for variation in variations[1:]:
            weights[variation] = (
                (corrections[variation] / corrections[""]).values - 1.).astype(
                    np.float32)

    central = weights[""]
    try:
        up = weights[source + "Up"]
        down = weights[source + "Down"]
    except KeyError:
        up = np.zeros_like(central, dtype=np.float32)
        down = np.zeros_like(central, dtype=np.float32)
    return weight_numba(central, nsig, up, down)
def jet_pt_res(ev, jers):
    indices = get_bin_indices(
        [
            np.abs(ev.Jet_eta.content),
            event_to_object_var(ev.fixedGridRhoFastjetAll,
                                ev.Jet_ptJESOnly.starts,
                                ev.Jet_ptJESOnly.stops)
        ],
        [jers["eta_low"].values, jers["rho_low"].values],
        [jers["eta_high"].values, jers["rho_high"].values],
        1,
    )[:, 0]
    df = jers.iloc[indices]
    params = df[["param0", "param1", "param2",
                 "param3"]].values.astype(np.float32)
    ptbounds = df[["pt_low", "pt_high"]].values
    return awk.JaggedArray(
        ev.Jet_ptJESOnly.starts,
        ev.Jet_ptJESOnly.stops,
        jer_formula(
            np.minimum(np.maximum(ev.Jet_ptJESOnly.content, ptbounds[:, 0]),
                       ptbounds[:, 1]).astype(np.float32),
            params[:, 0],
            params[:, 1],
            params[:, 2],
            params[:, 3],
        ),
    )
Example #3
0
def get_efficiencies(event, source, nsig, obj, selection, effmap):
    indices = get_bin_indices(
        [getattr(event, "{}_eta".format(obj))[selection].content,
         getattr(event, "{}_ptShift".format(obj))(event, source, nsig)[selection].content.astype(np.float32)],
        [effmap["xlow"].values, effmap["ylow"].values],
        [effmap["xupp"].values, effmap["yupp"].values],
        1,
    )[:,0]
    df = effmap.iloc[indices]
    return df["content"].values, df["error"].values
def evaluate_pu(ev, source, nsig, var, corrs):
    mins = corrs["nTrueInt"].values.astype(np.float32)
    maxs = mins[:] + 1
    mins[0] = -np.inf
    maxs[-1] = np.inf
    indices = get_bin_indices([getattr(ev, var)], [mins], [maxs], 1)[:, 0]
    ev_corrs = corrs.iloc[indices]

    nominal = ev_corrs["corr"].values
    up = (ev_corrs["corr_up"].values / nominal - 1.) * (source == "pileup")
    down = (ev_corrs["corr_down"].values / nominal - 1.) * (source == "pileup")
    return weight_numba(nominal, nsig, up, down).astype(np.float32)
def jes_corr(ev, source, nsig, jesuncs):
    flavour = source
    if source in ev.JesSources and nsig != 0.:
        updown = "Up" if nsig > 0. else "Down"
        flavour += updown
    else:
        starts = ev.Jet_pt.starts
        stops = ev.Jet_pt.stops
        return awk.JaggedArray(
            starts,
            stops,
            np.ones_like(ev.Jet_pt.content, dtype=np.float32),
        )

    if not ev.hasbranch("Jet_JEC{}".format(flavour)):
        df = jesuncs[jesuncs["source"] == (
            source[3:] if source.startswith("jes") else source)]

        indices = get_bin_indices(
            [ev.Jet_eta.content],
            [df["eta_low"].values],
            [df["eta_high"].values],
            1,
        )[:, 0]

        pt = np.array(list(df.iloc[indices]["pt"].values))
        corr_up = np.array(list(df.iloc[indices]["corr_up"].values))
        corr_down = np.array(list(df.iloc[indices]["corr_down"].values))

        corr_up = interpolate(ev.Jet_ptJESOnly.content, pt,
                              corr_up).astype(np.float32)
        corr_down = interpolate(ev.Jet_ptJESOnly.content, pt,
                                corr_down).astype(np.float32)

        starts = ev.Jet_eta.starts
        stops = ev.Jet_eta.stops

        setattr(ev, "Jet_JEC{}Up".format(source),
                awk.JaggedArray(
                    starts,
                    stops,
                    corr_up,
                ))
        setattr(ev, "Jet_JEC{}Down".format(source),
                awk.JaggedArray(
                    starts,
                    stops,
                    -1. * corr_down,
                ))

    return getattr(ev, "Jet_JEC{}".format(flavour))
def evaluate_btagsf(ev, source, nsig, df, attrs, h2f):
    jet_flavour = dict_apply(h2f, ev.Jet.hadronFlavour.content)

    # Create mask
    mask = np.ones((jet_flavour.shape[0], df.shape[0]), dtype=np.bool8)

    # Flavour mask
    event_attrs = [jet_flavour.astype(np.float32)]
    mins = [df["jetFlavor"].values.astype(np.float32)]
    maxs = [(df["jetFlavor"].values + 1).astype(np.float32)]

    for jet_attr, df_attr in attrs:
        obj_attr = getattr(ev.Jet, jet_attr)
        if callable(obj_attr):
            obj_attr = obj_attr(ev, source, nsig)
        event_attrs.append(obj_attr.content.astype(np.float32))
        mins.append(df[df_attr + "Min"].values.astype(np.float32))
        maxs.append(df[df_attr + "Max"].values.astype(np.float32))

    # Create indices from mask
    indices = get_bin_indices(event_attrs, mins, maxs, 3)
    idx_central = indices[:, 0]
    idx_down = indices[:, 1]
    idx_up = indices[:, 2]

    jpt = ev.Jet.ptShift(ev, source, nsig)
    sf = btag_formula(jpt.content, df.iloc[idx_central])
    sf_up = btag_formula(jpt.content, df.iloc[idx_up])
    sf_down = btag_formula(jpt.content, df.iloc[idx_down])

    sf_up = (source == "btagSF") * (sf_up / sf - 1.)
    sf_down = (source == "btagSF") * (sf_down / sf - 1.)
    return awk.JaggedArray(
        jpt.starts,
        jpt.stops,
        weight_numba(sf, nsig, sf_up, sf_down),
    )
def evaluate_object_weights(
    ev,
    source,
    nsig,
    df,
    bins_vars,
    add_syst,
    name,
):
    @nb.njit
    def weighted_mean_numba(
        objattr,
        w,
        k,
        statup,
        statdown,
        systup,
        systdown,
        addsyst,
        nweight,
    ):
        wsum = np.zeros_like(objattr, dtype=np.float32)
        wksum = np.zeros_like(objattr, dtype=np.float32)
        wdkstatupsum = np.zeros_like(objattr, dtype=np.float32)
        wdkstatdownsum = np.zeros_like(objattr, dtype=np.float32)
        wdksystupsum = np.zeros_like(objattr, dtype=np.float32)
        wdksystdownsum = np.zeros_like(objattr, dtype=np.float32)

        for idx in range(objattr.shape[0]):
            for subidx in range(nweight * idx, nweight * (idx + 1)):
                wsum[idx] += w[subidx]
                wksum[idx] += w[subidx] * k[subidx]
                wdkstatupsum[idx] += (w[subidx] * statup[subidx])**2
                wdkstatdownsum[idx] += (w[subidx] * statdown[subidx])**2
                wdksystupsum[idx] += (w[subidx] * systup[subidx])**2
                wdksystdownsum[idx] += (w[subidx] * systdown[subidx])**2

        mean = wksum / wsum
        stat_up = np.sqrt((wdkstatupsum / wsum**2) + addsyst**2)
        stat_down = -1. * np.sqrt((wdkstatdownsum / wsum**2) + addsyst**2)
        syst_up = np.sqrt((wdksystupsum / wsum**2) + addsyst**2)
        syst_down = -1. * np.sqrt((wdksystdownsum / wsum**2) + addsyst**2)
        return (
            mean.astype(np.float32),
            stat_up.astype(np.float32),
            stat_down.astype(np.float32),
            syst_up.astype(np.float32),
            syst_down.astype(np.float32),
        )

    event_vars = [v(ev, source, nsig) for v in bins_vars]
    for v in event_vars:
        v.content[np.isnan(v.content)] = 0.

    indices = get_bin_indices(
        [
            event_vars[idx].content.astype(np.float32)
            for idx in range(len(event_vars))
        ],
        [
            df["bin{}_low".format(idx)].values.astype(np.float32)
            for idx in range(len(event_vars))
        ],
        [
            df["bin{}_upp".format(idx)].values.astype(np.float32)
            for idx in range(len(event_vars))
        ],
        df["weight"].unique().shape[0],
    ).ravel()
    dfw = df.iloc[indices]

    sf, sf_statup, sf_statdown, sf_systup, sf_systdown = weighted_mean_numba(
        event_vars[0].content,
        dfw["weight"].values,
        dfw["corr"].values,
        dfw["stat_up"].values,
        dfw["stat_down"].values,
        dfw["syst_up"].values,
        dfw["syst_down"].values,
        add_syst(ev, source, nsig).content,
        df["weight"].unique().shape[0],
    )

    sfup = sf_systup if "syst" in source.lower() else sf_statup
    sfdown = sf_systdown if "syst" in source.lower() else sf_statdown
    return awk.JaggedArray(
        event_vars[0].starts, event_vars[0].stops,
        weight_numba(sf, nsig, sfup, sfdown)
        if name.lower() == source.lower().replace("stat", "").replace(
            "syst", "") else weight_numba(sf, 0., sfup, sfdown))
def jer_corr(ev, source, nsig, jersfs, maxdr_jets_with_genjets,
             ndpt_jets_with_genjets):
    flavour = "jerSF"
    if source == "jerSF" and nsig != 0.:
        updown = "Up" if nsig > 0. else "Down"
        flavour += updown

    if not ev.hasbranch("Jet_JEC{}".format(flavour)):
        indices = get_bin_indices(
            [ev.Jet_eta.content],
            [jersfs["eta_low"].values],
            [jersfs["eta_high"].values],
            1,
        )[:, 0]
        ressfs = jersfs.iloc[indices][["corr", "corr_up", "corr_down"]].values
        cjer = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32)
        cjer_up = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32)
        cjer_down = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32)

        # match gen jets
        gidx = match_jets_from_genjets(
            ev,
            maxdr_jets_with_genjets,
            ndpt_jets_with_genjets,
        )
        mask = (gidx >= 0)
        indices = (ev.GenJet_pt.starts + gidx[mask]).content
        gpt_matched = ev.GenJet_pt.content[indices]
        mask = mask.content

        gen_var = np.abs(1. - gpt_matched / ev.Jet_ptJESOnly.content[mask])
        gaus_var = np.random.normal(0., gen_var)
        cjer[mask] = 1. + (ressfs[mask, 0] - 1.) * gaus_var
        cjer_up[mask] = 1. + (ressfs[mask, 1] - 1.) * gaus_var
        cjer_down[mask] = 1. + (ressfs[mask, 2] - 1.) * gaus_var

        # unmatched gen jets
        gaus_var = np.random.normal(0., ev.Jet_ptResolution(ev).content[~mask])
        ressfs_mod = ressfs[~mask]**2 - 1.
        ressfs_mod[ressfs_mod < 0.] = 0.
        cjer[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 0])
        cjer_up[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 1])
        cjer_down[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 2])

        # negative checks
        cjer[cjer < 0.] = 0.
        cjer_up[cjer_up < 0.] = 0.
        cjer_down[cjer_down < 0.] = 0.

        cjer_up[cjer > 0.] = (cjer_up / cjer - 1.)[cjer > 0.]
        cjer_up[cjer == 0.] = 0.
        cjer_down[cjer > 0.] = (cjer_down / cjer - 1.)[cjer > 0.]
        cjer_down[cjer == 0.] = 0.

        # write to event
        starts, stops = ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops
        ev.Jet_JECjerSF = awk.JaggedArray(starts, stops, cjer)
        ev.Jet_JECjerSFUp = awk.JaggedArray(starts, stops, cjer_up)
        ev.Jet_JECjerSFDown = awk.JaggedArray(starts, stops, cjer_down)

    return getattr(ev, "Jet_JEC{}".format(flavour))