Exemple #1
0
def _string_equal(one, two):
    nplike = ak.nplike.of(one, two)
    behavior = ak._util.behaviorof(one, two)

    one, two = ak.without_parameters(one).layout, ak.without_parameters(
        two).layout

    # first condition: string lengths must be the same
    counts1 = nplike.asarray(one.count(axis=-1))
    counts2 = nplike.asarray(two.count(axis=-1))

    out = counts1 == counts2

    # only compare characters in strings that are possibly equal (same length)
    possible = nplike.logical_and(out, counts1)
    possible_counts = counts1[possible]

    if len(possible_counts) > 0:
        onepossible = one[possible]
        twopossible = two[possible]

        reduced = ak.all(ak.Array(onepossible) == ak.Array(twopossible),
                         axis=-1).layout

        # update same-length strings with a verdict about their characters
        out[possible] = reduced

    return ak._util.wrap(ak.layout.NumpyArray(out), behavior)
def jer_smear(
    variation,
    forceStochastic,
    pt_gen,
    jetPt,
    etaJet,
    jet_energy_resolution,
    jet_resolution_rand_gauss,
    jet_energy_resolution_scale_factor,
):
    pt_gen = pt_gen if not forceStochastic else None

    if not isinstance(jetPt, awkward.highlevel.Array):
        raise Exception("'jetPt' must be an awkward array of some kind!")

    if forceStochastic:
        pt_gen = awkward.without_parameters(awkward.zeros_like(jetPt))

    jersmear = jet_energy_resolution * jet_resolution_rand_gauss
    jersf = jet_energy_resolution_scale_factor[:, variation]
    deltaPtRel = (jetPt - pt_gen) / jetPt
    doHybrid = (pt_gen > 0) & (numpy.abs(deltaPtRel) <
                               3 * jet_energy_resolution)

    detSmear = 1 + (jersf - 1) * deltaPtRel
    stochSmear = 1 + numpy.sqrt(numpy.maximum(jersf**2 - 1, 0)) * jersmear

    min_jet_pt = _MIN_JET_ENERGY / numpy.cosh(etaJet)
    min_jet_pt_corr = min_jet_pt / jetPt

    smearfact = awkward.where(doHybrid, detSmear, stochSmear)
    smearfact = awkward.where((smearfact * jetPt) < min_jet_pt,
                              min_jet_pt_corr, smearfact)

    def getfunction(layout, depth):
        if isinstance(layout, awkward.layout.NumpyArray) or not isinstance(
                layout,
            (awkward.layout.Content, awkward.partition.PartitionedArray)):
            return lambda: awkward.layout.NumpyArray(smearfact)
        return None

    smearfact = awkward._util.recursively_apply(
        awkward.operations.convert.to_layout(jetPt), getfunction)
    smearfact = awkward._util.wrap(smearfact, awkward._util.behaviorof(jetPt))
    return smearfact
Exemple #3
0
def test_corrected_jets_factory():
    import os
    from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack

    events = None
    from coffea.nanoevents import NanoEventsFactory

    factory = NanoEventsFactory.from_root(
        os.path.abspath("tests/samples/nano_dy.root"))
    events = factory.events()

    jec_stack_names = [
        "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi",
        "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi",
        "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi",
        "Spring16_25nsV10_MC_SF_AK4PFPuppi",
    ]
    for key in evaluator.keys():
        if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key:
            jec_stack_names.append(key)

    jec_inputs = {name: evaluator[name] for name in jec_stack_names}
    jec_stack = JECStack(jec_inputs)

    name_map = jec_stack.blank_name_map
    name_map["JetPt"] = "pt"
    name_map["JetMass"] = "mass"
    name_map["JetEta"] = "eta"
    name_map["JetA"] = "area"

    jets = events.Jet

    jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"]
    jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"]
    jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0),
                                      np.float32)
    jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll,
                                      jets.pt)[0]
    name_map["ptGenJet"] = "pt_gen"
    name_map["ptRaw"] = "pt_raw"
    name_map["massRaw"] = "mass_raw"
    name_map["Rho"] = "rho"

    jec_cache = cachetools.Cache(np.inf)

    print(name_map)

    tic = time.time()
    jet_factory = CorrectedJetsFactory(name_map, jec_stack)
    toc = time.time()

    print("setup corrected jets time =", toc - tic)

    tic = time.time()
    prof = pyinstrument.Profiler()
    prof.start()
    corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache)
    prof.stop()
    toc = time.time()

    print("corrected_jets build time =", toc - tic)

    print(prof.output_text(unicode=True, color=True, show_all=True))

    tic = time.time()
    print("Generated jet pt:", corrected_jets.pt_gen)
    print("Original jet pt:", corrected_jets.pt_orig)
    print("Raw jet pt:", jets.pt_raw)
    print("Corrected jet pt:", corrected_jets.pt)
    print("Original jet mass:", corrected_jets.mass_orig)
    print("Raw jet mass:", jets["mass_raw"])
    print("Corrected jet mass:", corrected_jets.mass)
    print("jet eta:", jets.eta)
    for unc in jet_factory.uncertainties():
        print(unc)
        print(corrected_jets[unc].up.pt)
        print(corrected_jets[unc].down.pt)
    toc = time.time()

    print("build all jet variations =", toc - tic)

    # Test that the corrections were applied correctly
    from coffea.jetmet_tools import (
        FactorizedJetCorrector,
        JetResolution,
        JetResolutionScaleFactor,
    )

    scalar_form = ak.without_parameters(jets["pt_raw"]).layout.form
    corrector = FactorizedJetCorrector(
        **{name: evaluator[name]
           for name in jec_stack_names[0:4]})
    corrs = corrector.getCorrection(JetEta=jets["eta"],
                                    Rho=jets["rho"],
                                    JetPt=jets["pt_raw"],
                                    JetA=jets["area"])
    reso = JetResolution(
        **{name: evaluator[name]
           for name in jec_stack_names[4:5]})
    jets["jet_energy_resolution"] = reso.getResolution(
        JetEta=jets["eta"],
        Rho=jets["rho"],
        JetPt=jets["pt_raw"],
        form=scalar_form,
        lazy_cache=jec_cache,
    )
    resosf = JetResolutionScaleFactor(
        **{name: evaluator[name]
           for name in jec_stack_names[5:6]})
    jets["jet_energy_resolution_scale_factor"] = resosf.getScaleFactor(
        JetEta=jets["eta"], lazy_cache=jec_cache)

    # Filter out the non-deterministic (no gen pt) jets
    def smear_factor(jetPt, pt_gen, jersf):
        return (ak.full_like(jetPt, 1.0) +
                (jersf[:, 0] - ak.full_like(jetPt, 1.0)) *
                (jetPt - pt_gen) / jetPt)

    test_gen_pt = ak.concatenate(
        [corrected_jets.pt_gen[0, :-2], corrected_jets.pt_gen[-1, :-1]])
    test_raw_pt = ak.concatenate([jets.pt_raw[0, :-2], jets.pt_raw[-1, :-1]])
    test_pt = ak.concatenate(
        [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]])
    test_eta = ak.concatenate([jets.eta[0, :-2], jets.eta[-1, :-1]])
    test_jer = ak.concatenate([
        jets.jet_energy_resolution[0, :-2], jets.jet_energy_resolution[-1, :-1]
    ])
    test_jer_sf = ak.concatenate([
        jets.jet_energy_resolution_scale_factor[0, :-2],
        jets.jet_energy_resolution_scale_factor[-1, :-1],
    ])
    test_jec = ak.concatenate([corrs[0, :-2], corrs[-1, :-1]])
    test_corrected_pt = ak.concatenate(
        [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]])
    test_corr_pt = test_raw_pt * test_jec
    test_pt_smear_corr = test_corr_pt * smear_factor(test_corr_pt, test_gen_pt,
                                                     test_jer_sf)

    # Print the results of the "by-hand" calculations and confirm that the values match the expected values
    print("\nConfirm the CorrectedJetsFactory values:")
    print("Jet pt (gen)", test_gen_pt.tolist())
    print("Jet pt (raw)", test_raw_pt.tolist())
    print("Jet pt (nano):", test_pt.tolist())
    print("Jet eta:", test_eta.tolist())
    print("Jet energy resolution:", test_jer.tolist())
    print("Jet energy resolution sf:", test_jer_sf.tolist())
    print("Jet energy correction:", test_jec.tolist())
    print("Corrected jet pt (ref)", test_corr_pt.tolist())
    print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist())
    print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n")
    assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6)

    name_map["METpt"] = "pt"
    name_map["METphi"] = "phi"
    name_map["JetPhi"] = "phi"
    name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX"
    name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY"

    tic = time.time()
    met_factory = CorrectedMETFactory(name_map)
    toc = time.time()

    print("setup corrected MET time =", toc - tic)

    met = events.MET
    tic = time.time()
    # prof = pyinstrument.Profiler()
    # prof.start()
    corrected_met = met_factory.build(met,
                                      corrected_jets,
                                      lazy_cache=jec_cache)
    # prof.stop()
    toc = time.time()

    # print(prof.output_text(unicode=True, color=True, show_all=True))

    print("corrected_met build time =", toc - tic)

    tic = time.time()
    print(corrected_met.pt_orig)
    print(corrected_met.pt)
    prof = pyinstrument.Profiler()
    prof.start()
    for unc in jet_factory.uncertainties() + met_factory.uncertainties():
        print(unc)
        print(corrected_met[unc].up.pt)
        print(corrected_met[unc].down.pt)
    prof.stop()
    toc = time.time()

    print("build all met variations =", toc - tic)

    print(prof.output_text(unicode=True, color=True, show_all=True))
    def build(self, jets, lazy_cache):
        if lazy_cache is None:
            raise Exception(
                "CorrectedJetsFactory requires a awkward-array cache to function correctly."
            )
        lazy_cache = awkward._util.MappingProxy.maybe_wrap(lazy_cache)
        if not isinstance(jets, awkward.highlevel.Array):
            raise Exception(
                "'jets' must be an awkward > 1.0.0 array of some kind!")
        fields = awkward.fields(jets)
        if len(fields) == 0:
            raise Exception(
                "Empty record, please pass a jet object with at least {self.real_sig} defined!"
            )
        out = awkward.flatten(jets)
        wrap = partial(awkward_rewrap,
                       like_what=jets,
                       gfunc=rewrap_recordarray)
        scalar_form = awkward.without_parameters(
            out[self.name_map["ptRaw"]]).layout.form

        in_dict = {field: out[field] for field in fields}
        out_dict = dict(in_dict)

        # take care of nominal JEC (no JER if available)
        out_dict[self.name_map["JetPt"] +
                 "_orig"] = out_dict[self.name_map["JetPt"]]
        out_dict[self.name_map["JetMass"] +
                 "_orig"] = out_dict[self.name_map["JetMass"]]
        if self.treat_pt_as_raw:
            out_dict[self.name_map["ptRaw"]] = out_dict[self.name_map["JetPt"]]
            out_dict[self.name_map["massRaw"]] = out_dict[
                self.name_map["JetMass"]]

        jec_name_map = dict(self.name_map)
        jec_name_map["JetPt"] = jec_name_map["ptRaw"]
        jec_name_map["JetMass"] = jec_name_map["massRaw"]
        if self.jec_stack.jec is not None:
            jec_args = {
                k: out_dict[jec_name_map[k]]
                for k in self.jec_stack.jec.signature
            }
            out_dict[
                "jet_energy_correction"] = self.jec_stack.jec.getCorrection(
                    **jec_args, form=scalar_form, lazy_cache=lazy_cache)
        else:
            out_dict["jet_energy_correction"] = awkward.without_parameters(
                awkward.ones_like(out_dict[self.name_map["JetPt"]]))

        # finally the lazy binding to the JEC
        init_pt = partial(
            awkward.virtual,
            operator.mul,
            args=(out_dict["jet_energy_correction"],
                  out_dict[self.name_map["ptRaw"]]),
            cache=lazy_cache,
        )
        init_mass = partial(
            awkward.virtual,
            operator.mul,
            args=(
                out_dict["jet_energy_correction"],
                out_dict[self.name_map["massRaw"]],
            ),
            cache=lazy_cache,
        )

        out_dict[self.name_map["JetPt"]] = init_pt(length=len(out),
                                                   form=scalar_form)
        out_dict[self.name_map["JetMass"]] = init_mass(length=len(out),
                                                       form=scalar_form)

        out_dict[self.name_map["JetPt"] +
                 "_jec"] = out_dict[self.name_map["JetPt"]]
        out_dict[self.name_map["JetMass"] +
                 "_jec"] = out_dict[self.name_map["JetMass"]]

        # in jer we need to have a stash for the intermediate JEC products
        has_jer = False
        if self.jec_stack.jer is not None and self.jec_stack.jersf is not None:
            has_jer = True
            jer_name_map = dict(self.name_map)
            jer_name_map["JetPt"] = jer_name_map["JetPt"] + "_jec"
            jer_name_map["JetMass"] = jer_name_map["JetMass"] + "_jec"

            jerargs = {
                k: out_dict[jer_name_map[k]]
                for k in self.jec_stack.jer.signature
            }
            out_dict[
                "jet_energy_resolution"] = self.jec_stack.jer.getResolution(
                    **jerargs, form=scalar_form, lazy_cache=lazy_cache)

            jersfargs = {
                k: out_dict[jer_name_map[k]]
                for k in self.jec_stack.jersf.signature
            }
            out_dict[
                "jet_energy_resolution_scale_factor"] = self.jec_stack.jersf.getScaleFactor(
                    **jersfargs, form=_JERSF_FORM, lazy_cache=lazy_cache)

            seeds = numpy.array(out_dict[self.name_map["JetPt"] +
                                         "_orig"])[[0, -1]].view("i4")
            out_dict["jet_resolution_rand_gauss"] = awkward.virtual(
                rand_gauss,
                args=(
                    out_dict[self.name_map["JetPt"] + "_orig"],
                    numpy.random.Generator(numpy.random.PCG64(seeds)),
                ),
                cache=lazy_cache,
                length=len(out),
                form=scalar_form,
            )

            init_jerc = partial(
                awkward.virtual,
                jer_smear,
                args=(
                    0,
                    self.forceStochastic,
                    out_dict[jer_name_map["ptGenJet"]],
                    out_dict[jer_name_map["JetPt"]],
                    out_dict[jer_name_map["JetEta"]],
                    out_dict["jet_energy_resolution"],
                    out_dict["jet_resolution_rand_gauss"],
                    out_dict["jet_energy_resolution_scale_factor"],
                ),
                cache=lazy_cache,
            )
            out_dict["jet_energy_resolution_correction"] = init_jerc(
                length=len(out), form=scalar_form)

            init_pt_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    out_dict["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetPt"]],
                ),
                cache=lazy_cache,
            )
            init_mass_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    out_dict["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetMass"]],
                ),
                cache=lazy_cache,
            )
            out_dict[self.name_map["JetPt"]] = init_pt_jer(length=len(out),
                                                           form=scalar_form)
            out_dict[self.name_map["JetMass"]] = init_mass_jer(
                length=len(out), form=scalar_form)

            out_dict[self.name_map["JetPt"] +
                     "_jer"] = out_dict[self.name_map["JetPt"]]
            out_dict[self.name_map["JetMass"] +
                     "_jer"] = out_dict[self.name_map["JetMass"]]

            # JER systematics
            jerc_up = partial(
                awkward.virtual,
                jer_smear,
                args=(
                    1,
                    self.forceStochastic,
                    out_dict[jer_name_map["ptGenJet"]],
                    out_dict[jer_name_map["JetPt"]],
                    out_dict[jer_name_map["JetEta"]],
                    out_dict["jet_energy_resolution"],
                    out_dict["jet_resolution_rand_gauss"],
                    out_dict["jet_energy_resolution_scale_factor"],
                ),
                cache=lazy_cache,
            )
            up = awkward.flatten(jets)
            up["jet_energy_resolution_correction"] = jerc_up(length=len(out),
                                                             form=scalar_form)
            init_pt_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    up["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetPt"]],
                ),
                cache=lazy_cache,
            )
            init_mass_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    up["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetMass"]],
                ),
                cache=lazy_cache,
            )
            up[self.name_map["JetPt"]] = init_pt_jer(length=len(out),
                                                     form=scalar_form)
            up[self.name_map["JetMass"]] = init_mass_jer(length=len(out),
                                                         form=scalar_form)

            jerc_down = partial(
                awkward.virtual,
                jer_smear,
                args=(
                    2,
                    self.forceStochastic,
                    out_dict[jer_name_map["ptGenJet"]],
                    out_dict[jer_name_map["JetPt"]],
                    out_dict[jer_name_map["JetEta"]],
                    out_dict["jet_energy_resolution"],
                    out_dict["jet_resolution_rand_gauss"],
                    out_dict["jet_energy_resolution_scale_factor"],
                ),
                cache=lazy_cache,
            )
            down = awkward.flatten(jets)
            down["jet_energy_resolution_correction"] = jerc_down(
                length=len(out), form=scalar_form)
            init_pt_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    down["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetPt"]],
                ),
                cache=lazy_cache,
            )
            init_mass_jer = partial(
                awkward.virtual,
                operator.mul,
                args=(
                    down["jet_energy_resolution_correction"],
                    out_dict[jer_name_map["JetMass"]],
                ),
                cache=lazy_cache,
            )
            down[self.name_map["JetPt"]] = init_pt_jer(length=len(out),
                                                       form=scalar_form)
            down[self.name_map["JetMass"]] = init_mass_jer(length=len(out),
                                                           form=scalar_form)
            out_dict["JER"] = awkward.zip({
                "up": up,
                "down": down
            },
                                          depth_limit=1,
                                          with_name="JetSystematic")

        if self.jec_stack.junc is not None:
            juncnames = {}
            juncnames.update(self.name_map)
            if has_jer:
                juncnames["JetPt"] = juncnames["JetPt"] + "_jer"
                juncnames["JetMass"] = juncnames["JetMass"] + "_jer"
            else:
                juncnames["JetPt"] = juncnames["JetPt"] + "_jec"
                juncnames["JetMass"] = juncnames["JetMass"] + "_jec"
            juncargs = {
                k: out_dict[juncnames[k]]
                for k in self.jec_stack.junc.signature
            }
            juncs = self.jec_stack.junc.getUncertainty(**juncargs)

            def junc_smeared_val(uncvals, up_down, variable):
                return awkward.materialized(uncvals[:, up_down] * variable)

            def build_variation(unc, jetpt, jetpt_orig, jetmass, jetmass_orig,
                                updown):
                var_dict = dict(in_dict)
                var_dict[jetpt] = awkward.virtual(
                    junc_smeared_val,
                    args=(
                        unc,
                        updown,
                        jetpt_orig,
                    ),
                    length=len(out),
                    form=scalar_form,
                    cache=lazy_cache,
                )
                var_dict[jetmass] = awkward.virtual(
                    junc_smeared_val,
                    args=(
                        unc,
                        updown,
                        jetmass_orig,
                    ),
                    length=len(out),
                    form=scalar_form,
                    cache=lazy_cache,
                )
                return awkward.zip(
                    var_dict,
                    depth_limit=1,
                    parameters=out.layout.parameters,
                    behavior=out.behavior,
                )

            def build_variant(unc, jetpt, jetpt_orig, jetmass, jetmass_orig):
                up = build_variation(unc, jetpt, jetpt_orig, jetmass,
                                     jetmass_orig, 0)
                down = build_variation(unc, jetpt, jetpt_orig, jetmass,
                                       jetmass_orig, 1)
                return awkward.zip({
                    "up": up,
                    "down": down
                },
                                   depth_limit=1,
                                   with_name="JetSystematic")

            for name, func in juncs:
                out_dict[f"jet_energy_uncertainty_{name}"] = func
                out_dict[f"JES_{name}"] = build_variant(
                    func,
                    self.name_map["JetPt"],
                    out_dict[juncnames["JetPt"]],
                    self.name_map["JetMass"],
                    out_dict[juncnames["JetMass"]],
                )

        out_parms = out.layout.parameters
        out_parms["corrected"] = True
        out = awkward.zip(out_dict,
                          depth_limit=1,
                          parameters=out_parms,
                          behavior=out.behavior)

        return wrap(out)
Exemple #5
0
 def mass(self):
     return awkward.without_parameters(awkward.zeros_like(self.pt))