Beispiel #1
0
def fill_gen_jets(df, output):
    gjets = df.GenJet
    gleptons = df.GenPart[(abs(df.GenPart.pdgId) == 13)
                          | (abs(df.GenPart.pdgId) == 11)
                          | (abs(df.GenPart.pdgId) == 15)]
    gl_pair = ak.cartesian({
        "jet": gjets,
        "lepton": gleptons
    },
                           axis=1,
                           nested=True)
    _, _, dr_gl = delta_r(
        gl_pair["jet"].eta,
        gl_pair["lepton"].eta,
        gl_pair["jet"].phi,
        gl_pair["lepton"].phi,
    )
    isolated = ak.all((dr_gl > 0.3), axis=-1)
    gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0],
                                              ["pt", "eta", "phi", "mass"]]
    gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1],
                                              ["pt", "eta", "phi", "mass"]]
    gjet1.index = gjet1.index.droplevel("subentry")
    gjet2.index = gjet2.index.droplevel("subentry")
    gjsum = p4_sum(gjet1, gjet2)
    for var in ["pt", "eta", "phi", "mass"]:
        output[f"gjet1_{var}"] = gjet1[var]
        output[f"gjet2_{var}"] = gjet2[var]
        output[f"gjj_{var}"] = gjsum[var]
    output["gjj_dEta"], output["gjj_dPhi"], output["gjj_dR"] = delta_r(
        output.gjet1_eta, output.gjet2_eta, output.gjet1_phi, output.gjet2_phi)
    return output
Beispiel #2
0
def fill_muons(processor, output, mu1, mu2, is_mc):
    mu1_variable_names = [
        "mu1_pt", "mu1_pt_over_mass", "mu1_eta", "mu1_phi", "mu1_iso"
    ]
    mu2_variable_names = [
        "mu2_pt", "mu2_pt_over_mass", "mu2_eta", "mu2_phi", "mu2_iso"
    ]
    dimuon_variable_names = [
        "dimuon_mass",
        "dimuon_ebe_mass_res",
        "dimuon_ebe_mass_res_rel",
        "dimuon_pt",
        "dimuon_pt_log",
        "dimuon_eta",
        "dimuon_phi",
        "dimuon_dEta",
        "dimuon_dPhi",
        "dimuon_dR",
        "dimuon_rap",
        "dimuon_cos_theta_cs",
        "dimuon_phi_cs",
    ]
    v_names = mu1_variable_names + mu2_variable_names + dimuon_variable_names

    # Initialize columns for muon variables
    for n in v_names:
        output[n] = 0.0

    # Fill single muon variables
    for v in ["pt", "ptErr", "eta", "phi"]:
        output[f"mu1_{v}"] = mu1[v]
        output[f"mu2_{v}"] = mu2[v]

    output["mu1_iso"] = mu1.pfRelIso04_all
    output["mu2_iso"] = mu2.pfRelIso04_all
    output["mu1_pt_over_mass"] = output.mu1_pt / output.dimuon_mass
    output["mu2_pt_over_mass"] = output.mu2_pt / output.dimuon_mass

    # Fill dimuon variables
    mm = p4_sum(mu1, mu2)
    for v in ["pt", "eta", "phi", "mass", "rap"]:
        name = f"dimuon_{v}"
        output[name] = mm[v]
        output[name] = output[name].fillna(-999.0)

    output["dimuon_pt_log"] = np.log(output.dimuon_pt)

    mm_deta, mm_dphi, mm_dr = delta_r(mu1.eta, mu2.eta, mu1.phi, mu2.phi)

    output["dimuon_dEta"] = mm_deta
    output["dimuon_dPhi"] = mm_dphi
    output["dimuon_dR"] = mm_dr

    output["dimuon_ebe_mass_res"] = mass_resolution(is_mc, processor.evaluator,
                                                    output, processor.year)
    output[
        "dimuon_ebe_mass_res_rel"] = output.dimuon_ebe_mass_res / output.dimuon_mass

    output["dimuon_cos_theta_cs"], output["dimuon_phi_cs"] = cs_variables(
        mu1, mu2)
Beispiel #3
0
def gen_jet_pair_mass(df):
    gjmass = None
    gjets = df.GenJet
    gleptons = df.GenPart[(abs(df.GenPart.pdgId) == 13)
                          | (abs(df.GenPart.pdgId) == 11)
                          | (abs(df.GenPart.pdgId) == 15)]
    gl_pair = ak.cartesian({
        "jet": gjets,
        "lepton": gleptons
    },
                           axis=1,
                           nested=True)
    _, _, dr_gl = delta_r(
        gl_pair["jet"].eta,
        gl_pair["lepton"].eta,
        gl_pair["jet"].phi,
        gl_pair["lepton"].phi,
    )
    isolated = ak.all((dr_gl > 0.3), axis=-1)
    if ak.count(gjets[isolated], axis=None) > 0:
        # TODO: convert only relevant fields!
        gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0],
                                                  ["pt", "eta", "phi", "mass"]]
        gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1],
                                                  ["pt", "eta", "phi", "mass"]]
        gjet1.index = gjet1.index.droplevel("subentry")
        gjet2.index = gjet2.index.droplevel("subentry")

        gjsum = p4_sum(gjet1, gjet2)
        gjmass = gjsum.mass
    return gjmass
Beispiel #4
0
def fill_muons(output, mu1, mu2):
    mu1_variable_names = [
        "mu1_pt",
        "mu1_pt_over_mass",
        "mu1_eta",
        "mu1_phi",
        "mu1_iso",
        "mu1_charge",
    ]
    mu2_variable_names = [
        "mu2_pt",
        "mu2_pt_over_mass",
        "mu2_eta",
        "mu2_phi",
        "mu2_iso",
        "mu2_charge",
    ]
    dimuon_variable_names = [
        "dimuon_mass",
        "dimuon_pt",
        "dimuon_pt_log",
        "dimuon_eta",
        "dimuon_phi",
        "dimuon_dEta",
        "dimuon_dPhi",
        "dimuon_dR",
        "dimuon_rap",
        "dimuon_cos_theta_cs",
        "dimuon_phi_cs",
    ]
    v_names = mu1_variable_names + mu2_variable_names + dimuon_variable_names

    # Initialize columns for muon variables
    for n in v_names:
        output[n] = 0.0

    # Fill single muon variables
    for v in ["pt", "eta", "phi", "charge"]:
        output[f"mu1_{v}"] = mu1[v]
        output[f"mu2_{v}"] = mu2[v]
    mu1["mass"] = mu2["mass"] = 0.10566

    # Fill dimuon variables
    mm = p4_sum(mu1, mu2)
    for v in ["pt", "eta", "phi", "mass", "rap"]:
        name = f"dimuon_{v}"
        output[name] = mm[v]
        output[name] = output[name].fillna(-999.0)

    output["dimuon_pt_log"] = np.log(output.dimuon_pt)

    mm_deta, mm_dphi, mm_dr = delta_r(mu1.eta, mu2.eta, mu1.phi, mu2.phi)

    output["dimuon_dEta"] = mm_deta
    output["dimuon_dPhi"] = mm_dphi
    output["dimuon_dR"] = mm_dr

    output["dimuon_cos_theta_cs"], output["dimuon_phi_cs"] = cs_variables(
        mu1, mu2)
Beispiel #5
0
def fill_softjets(df, output, variables, cutoff):
    saj_df = ak.to_pandas(df.SoftActivityJet)
    saj_df["mass"] = 0.0
    nj_name = f"SoftActivityJetNjets{cutoff}"
    ht_name = f"SoftActivityJetHT{cutoff}"
    res = ak.to_pandas(df[[nj_name, ht_name]])

    res["to_correct"] = output.two_muons | (variables.njets > 0)
    _, _, dR_m1 = delta_r(saj_df.eta, output.mu1_eta, saj_df.phi,
                          output.mu1_phi)
    _, _, dR_m2 = delta_r(saj_df.eta, output.mu2_eta, saj_df.phi,
                          output.mu2_phi)
    _, _, dR_j1 = delta_r(saj_df.eta, variables.jet1_eta, saj_df.phi,
                          variables.jet1_phi)
    _, _, dR_j2 = delta_r(saj_df.eta, variables.jet2_eta, saj_df.phi,
                          variables.jet2_phi)
    saj_df["dR_m1"] = dR_m1 < 0.4
    saj_df["dR_m2"] = dR_m2 < 0.4
    saj_df["dR_j1"] = dR_j1 < 0.4
    saj_df["dR_j2"] = dR_j2 < 0.4
    dr_cols = ["dR_m1", "dR_m2", "dR_j1", "dR_j2"]
    saj_df[dr_cols] = saj_df[dr_cols].fillna(False)
    saj_df["to_remove"] = saj_df[dr_cols].sum(axis=1).astype(bool)

    saj_df_filtered = saj_df[(~saj_df.to_remove) & (saj_df.pt > cutoff)]
    footprint = saj_df[(saj_df.to_remove) & (saj_df.pt > cutoff)]
    res["njets_corrected"] = (
        saj_df_filtered.reset_index().groupby("entry")["subentry"].nunique())
    res["njets_corrected"] = res["njets_corrected"].fillna(0).astype(int)
    res["footprint"] = footprint.pt.groupby(level=[0]).sum()
    res["footprint"] = res["footprint"].fillna(0.0)
    res["ht_corrected"] = res[ht_name] - res.footprint
    res.loc[res.ht_corrected < 0, "ht_corrected"] = 0.0

    res.loc[res.to_correct, nj_name] = res.loc[res.to_correct,
                                               "njets_corrected"]

    res.loc[res.to_correct, ht_name] = res.loc[res.to_correct, "ht_corrected"]

    variables[f"nsoftjets{cutoff}"] = res[f"SoftActivityJetNjets{cutoff}"]
    variables[f"htsoft{cutoff}"] = res[f"SoftActivityJetHT{cutoff}"]
Beispiel #6
0
def fill_gen_jets(df, output):
    features = ["PT", "Eta", "Phi", "Mass"]
    gjets = df.GenJet[features]
    print(df.GenJet.fields)
    gleptons = df.MuonMedium
    # gleptons = df.GenPart[
    #    (abs(df.GenPart.pdgId) == 13)
    #    | (abs(df.GenPart.pdgId) == 11)
    #    | (abs(df.GenPart.pdgId) == 15)
    # ]
    gl_pair = ak.cartesian({"jet": gjets, "lepton": gleptons}, axis=1, nested=True)
    _, _, dr_gl = delta_r(
        gl_pair["jet"].Eta,
        gl_pair["lepton"].Eta,
        gl_pair["jet"].Phi,
        gl_pair["lepton"].Phi,
    )
    isolated = ak.all((dr_gl > 0.3), axis=-1)

    gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0], features]
    gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1], features]
    gjet1.index = gjet1.index.droplevel("subentry")
    gjet2.index = gjet2.index.droplevel("subentry")
    feat_map = {"pt": "PT", "eta": "Eta", "phi": "Phi", "mass": "Mass"}
    for var in ["pt", "eta", "phi", "mass"]:
        gjet1[var] = gjet1[feat_map[var]]
        gjet2[var] = gjet2[feat_map[var]]
    gjsum = p4_sum(gjet1, gjet2)

    for var in ["pt", "eta", "phi", "mass"]:
        output[f"gjet1_{var}"] = gjet1[var]
        output[f"gjet2_{var}"] = gjet2[var]
        output[f"gjj_{var}"] = gjsum[var]
    output["gjj_dEta"], output["gjj_dPhi"], output["gjj_dR"] = delta_r(
        output.gjet1_eta, output.gjet2_eta, output.gjet1_phi, output.gjet2_phi
    )
    return output
Beispiel #7
0
def fill_jets(output, jet1, jet2):
    variable_names = [
        "jet1_pt",
        "jet1_eta",
        "jet1_rap",
        "jet1_phi",
        "jet2_pt",
        "jet2_eta",
        "jet2_rap",
        "jet2_phi",
        "jj_mass",
        "jj_mass_log",
        "jj_pt",
        "jj_eta",
        "jj_phi",
        "jj_dEta",
        "jj_dPhi",
        "mmj1_dEta",
        "mmj1_dPhi",
        "mmj1_dR",
        "mmj2_dEta",
        "mmj2_dPhi",
        "mmj2_dR",
        "mmj_min_dEta",
        "mmj_min_dPhi",
        "mmjj_pt",
        "mmjj_eta",
        "mmjj_phi",
        "mmjj_mass",
        "rpt",
        "zeppenfeld",
        "ll_zstar_log",
    ]

    for v in variable_names:
        output[v] = -999.0

    # Fill single jet variables
    for v in ["pt", "eta", "phi"]:
        output[f"jet1_{v}"] = jet1[v]
        output[f"jet2_{v}"] = jet2[v]

    output.jet1_rap = rapidity(jet1)
    output.jet2_rap = rapidity(jet2)

    # Fill dijet variables
    jj = p4_sum(jet1, jet2)
    for v in ["pt", "eta", "phi", "mass"]:
        output[f"jj_{v}"] = jj[v]

    output.jj_mass_log = np.log(output.jj_mass)

    output.jj_dEta, output.jj_dPhi, _ = delta_r(
        output.jet1_eta, output.jet2_eta, output.jet1_phi, output.jet2_phi
    )

    # Fill dimuon-dijet system variables
    mm_columns = ["dimuon_pt", "dimuon_eta", "dimuon_phi", "dimuon_mass"]
    jj_columns = ["jj_pt", "jj_eta", "jj_phi", "jj_mass"]

    dimuons = output.loc[:, mm_columns]
    dijets = output.loc[:, jj_columns]

    # careful with renaming
    dimuons.columns = ["mass", "pt", "eta", "phi"]
    dijets.columns = ["pt", "eta", "phi", "mass"]

    mmjj = p4_sum(dimuons, dijets)
    for v in ["pt", "eta", "phi", "mass"]:
        output[f"mmjj_{v}"] = mmjj[v]

    output.zeppenfeld = output.dimuon_eta - 0.5 * (output.jet1_eta + output.jet2_eta)
    output.rpt = output.mmjj_pt / (output.dimuon_pt + output.jet1_pt + output.jet2_pt)
    ll_ystar = output.dimuon_rap - (output.jet1_rap + output.jet2_rap) / 2
    ll_zstar = abs(ll_ystar / (output.jet1_rap - output.jet2_rap))

    output.ll_zstar_log = np.log(ll_zstar)

    output.mmj1_dEta, output.mmj1_dPhi, output.mmj1_dR = delta_r(
        output.dimuon_eta, output.jet1_eta, output.dimuon_phi, output.jet1_phi
    )

    output.mmj2_dEta, output.mmj2_dPhi, output.mmj2_dR = delta_r(
        output.dimuon_eta, output.jet2_eta, output.dimuon_phi, output.jet2_phi
    )

    output.mmj_min_dEta = np.where(
        output.mmj1_dEta, output.mmj2_dEta, (output.mmj1_dEta < output.mmj2_dEta)
    )

    output.mmj_min_dPhi = np.where(
        output.mmj1_dPhi, output.mmj2_dPhi, (output.mmj1_dPhi < output.mmj2_dPhi)
    )

    output[variable_names] = output[variable_names].fillna(-999.0)
Beispiel #8
0
def fill_jets(output, variables, jet1, jet2):
    variable_names = [
        "jet1_pt",
        "jet1_eta",
        "jet1_rap",
        "jet1_phi",
        "jet1_qgl",
        "jet1_jetId",
        "jet1_puId",
        "jet2_pt",
        "jet2_eta",
        "jet2_rap",
        "jet2_phi",
        "jet2_qgl",
        "jet2_jetId",
        "jet2_puId",
        "jj_mass",
        "jj_mass_log",
        "jj_pt",
        "jj_eta",
        "jj_phi",
        "jj_dEta",
        "jj_dPhi",
        "mmj1_dEta",
        "mmj1_dPhi",
        "mmj1_dR",
        "mmj2_dEta",
        "mmj2_dPhi",
        "mmj2_dR",
        "mmj_min_dEta",
        "mmj_min_dPhi",
        "mmjj_pt",
        "mmjj_eta",
        "mmjj_phi",
        "mmjj_mass",
        "rpt",
        "zeppenfeld",
        "ll_zstar_log",
        "nsoftjets2",
        "nsoftjets5",
        "htsoft2",
        "htsoft5",
        "selection",
    ]

    for v in variable_names:
        variables[v] = -999.0

    # Fill single jet variables
    for v in ["pt", "eta", "phi", "qgl", "jetId", "puId"]:
        variables[f"jet1_{v}"] = jet1[v]
        variables[f"jet2_{v}"] = jet2[v]

    variables.jet1_rap = rapidity(jet1)
    variables.jet2_rap = rapidity(jet2)

    # Fill dijet variables
    jj = p4_sum(jet1, jet2)
    for v in ["pt", "eta", "phi", "mass"]:
        variables[f"jj_{v}"] = jj[v]

    variables.jj_mass_log = np.log(variables.jj_mass)

    variables.jj_dEta, variables.jj_dPhi, _ = delta_r(variables.jet1_eta,
                                                      variables.jet2_eta,
                                                      variables.jet1_phi,
                                                      variables.jet2_phi)

    # Fill dimuon-dijet system variables
    mm_columns = ["dimuon_pt", "dimuon_eta", "dimuon_phi", "dimuon_mass"]
    jj_columns = ["jj_pt", "jj_eta", "jj_phi", "jj_mass"]

    dimuons = output.loc[:, mm_columns]
    dijets = variables.loc[:, jj_columns]

    # careful with renaming
    dimuons.columns = ["mass", "pt", "eta", "phi"]
    dijets.columns = ["pt", "eta", "phi", "mass"]

    mmjj = p4_sum(dimuons, dijets)
    for v in ["pt", "eta", "phi", "mass"]:
        variables[f"mmjj_{v}"] = mmjj[v]

    variables.zeppenfeld = output.dimuon_eta - 0.5 * (variables.jet1_eta +
                                                      variables.jet2_eta)

    variables.rpt = variables.mmjj_pt / (output.dimuon_pt + variables.jet1_pt +
                                         variables.jet2_pt)

    ll_ystar = output.dimuon_rap - (variables.jet1_rap +
                                    variables.jet2_rap) / 2

    ll_zstar = abs(ll_ystar / (variables.jet1_rap - variables.jet2_rap))

    variables.ll_zstar_log = np.log(ll_zstar)

    variables.mmj1_dEta, variables.mmj1_dPhi, variables.mmj1_dR = delta_r(
        output.dimuon_eta, variables.jet1_eta, output.dimuon_phi,
        variables.jet1_phi)

    variables.mmj2_dEta, variables.mmj2_dPhi, variables.mmj2_dR = delta_r(
        output.dimuon_eta, variables.jet2_eta, output.dimuon_phi,
        variables.jet2_phi)

    variables.mmj_min_dEta = np.where(
        variables.mmj1_dEta,
        variables.mmj2_dEta,
        (variables.mmj1_dEta < variables.mmj2_dEta),
    )

    variables.mmj_min_dPhi = np.where(
        variables.mmj1_dPhi,
        variables.mmj2_dPhi,
        (variables.mmj1_dPhi < variables.mmj2_dPhi),
    )