def test_flatten_RecordArray():
    array = ak.Array(
        [
            {"x": [], "y": [[3, 3, 3]]},
            {"x": [[1]], "y": [[2, 2]]},
            {"x": [[2], [2]], "y": [[1]]},
            {"x": [[3], [3], [3]], "y": [[]]},
        ]
    )
    assert ak.to_list(ak.flatten(array, axis=2)) == [
        {"x": [], "y": [3, 3, 3]},
        {"x": [1], "y": [2, 2]},
        {"x": [2, 2], "y": [1]},
        {"x": [3, 3, 3], "y": []},
    ]
    assert ak.to_list(ak.flatten(array[1:], axis=2)) == [
        {"x": [1], "y": [2, 2]},
        {"x": [2, 2], "y": [1]},
        {"x": [3, 3, 3], "y": []},
    ]
    assert ak.to_list(ak.flatten(array[:, 1:], axis=2)) == [
        {"x": [], "y": []},
        {"x": [], "y": []},
        {"x": [2], "y": []},
        {"x": [3, 3], "y": []},
    ]
def find_permutations(jets, leptons, MET, btagWP):
    '''
    Inputs:
        Jets, leptons, MET, and if jets pass btag WP
    Returns:
        List of (jet assignment ordering, associated neutrino solutions)
    '''

    jets_inputs = np.stack((ak.to_numpy(ak.flatten(jets.px)), ak.to_numpy(ak.flatten(jets.py)), ak.to_numpy(ak.flatten(jets.pz)), ak.to_numpy(ak.flatten(jets.energy)), ak.to_numpy(ak.flatten(jets[btagWP]))), axis=1).astype('float64') # one row has (px, py, pyz, E)
    lepton_inputs = np.stack((ak.to_numpy(ak.flatten(leptons.px)), ak.to_numpy(ak.flatten(leptons.py)), ak.to_numpy(ak.flatten(leptons.pz)), ak.to_numpy(ak.flatten(leptons.energy))), axis=1).astype('float64') # one row has (px, py, pyz, E)
    met_inputs = np.stack((ak.to_numpy(MET.px), ak.to_numpy(MET.py)), axis=1).astype('float64') # one row has (px, py)
    p_ordering, p_nu = get_test_permutations(njets_array=ak.num(jets), jets=jets_inputs, leptons=lepton_inputs, met=met_inputs)

    #set_trace()
    test_perms = ak.Array({
        'blepIdx' : ak.from_iter(p_ordering)[:, :, 0],
        'bhadIdx' : ak.from_iter(p_ordering)[:, :, 1],
        'wjaIdx' : ak.from_iter(p_ordering)[:, :, 2],
        'wjbIdx' : ak.from_iter(p_ordering)[:, :, 3],
        'Nu' : ak.Array({
            'px' : ak.from_iter(p_nu)[:, :, 0],
            'py' : ak.from_iter(p_nu)[:, :, 1],
            'pz' : ak.from_iter(p_nu)[:, :, 2],
            'chi2' : ak.from_iter(p_nu)[:, :, 3],
        })
    })
    return test_perms
Beispiel #3
0
    def _kExtra(self, kpt, eta, nl, u, s=0, m=0):
        # if it is a jagged array, save the offsets then flatten everything
        # needed for the ternary conditions later
        abseta = abs(eta)
        kData = self._kRes[s][m][1](abseta)  # type 1 is data
        kMC = self._kRes[s][m][0](abseta)  # type 0 is MC
        mask = kData > kMC
        x = awkward.zeros_like(kpt)
        sigma = self._sigma(kpt, eta, nl, s, m)
        # Rochester cbA = beta, cbN = m, as well as cbM (always 0?) = loc and cbS = scale to transform y = (x-loc)/scale in the pdf method
        cbA = self._cbA[s][m](abseta, nl)
        cbN = self._cbN[s][m](abseta, nl)
        cbS = self._cbS[s][m](abseta, nl)
        counts = awkward.num(u)
        u_flat = awkward.flatten(u)
        loc = awkward.zeros_like(u_flat)
        cbA_flat = awkward.flatten(cbA)
        cbN_flat = awkward.flatten(cbN)
        cbS_flat = awkward.flatten(cbS)

        invcdf = awkward.unflatten(
            doublecrystalball.ppf(u_flat, cbA_flat, cbA_flat, cbN_flat,
                                  cbN_flat, loc, cbS_flat),
            counts,
        )

        x = awkward.where(
            mask,
            (numpy.sqrt(kData * kData - kMC * kMC) * sigma * invcdf),
            x,
        )
        result = awkward.where(x > -1, 1.0 / (1.0 + x), awkward.ones_like(kpt))
        if isinstance(kpt, numpy.ndarray):
            result = numpy.array(result)
        return result
Beispiel #4
0
def plot_distributions(obj):
    global tree
    blacklist = ["hitIdx","simTrkIdx","layer","pt","eta","phi","sim_pt","sim_eta","sim_phi","type", "ring", "moduleType_binary","layer_binary","isFake","isDuplicate"]
    print("object = ",obj)
    quantities = []
    for name in tree.keys():
        if name[:len(obj)] == obj and name not in map(lambda x : "{}_{}".format(obj,x),blacklist):
            quantities.append(name)
    matchedMask = tree["{}_isFake".format(obj)].array() == 0

    layers = np.array(list(map(process_layers,ak.flatten(tree["{}_layer_binary".format(obj)].array()))))
    #moduleTypes = np.array(list(map(process_moduleTypes,ak.flatten(tree["{}_moduleType_binary".format(obj)].array()))))
    layerTypes = np.array(list(map(process_layerType,layers)))
#    layerTypes = np.array(list(map(process_numbers, layers)))
#    print(layerTypes)
    unique_layerTypes = np.unique(layerTypes, axis = 0)
    unique_layerTypes = np.append(unique_layerTypes,"")
    print(unique_layerTypes)
    #Generic
    for layerType in unique_layerTypes:
        print("layerType = {}".format(layerType))
        for quantity in quantities:
            print("quantity = {}".format(quantity))
            if layerType == "":
                qArray = ak.flatten(tree[quantity].array())
                qArraySimTrackMatched = qArray[ak.flatten(matchedMask)]
            else:
                qArray = ak.flatten(tree[quantity].array())[layerTypes == layerType]
                qArraySimTrackMatched = qArray[ak.flatten(matchedMask)[layerTypes == layerType]]


            if all(qArray == -999):
                continue
            make_plots(qArray,qArraySimTrackMatched,quantity,layerType)
    def fill_genp_hists(self, accumulator, dname, genp_type, flag, obj,
                        evt_weights):
        #set_trace()
        accumulator["pt"].fill(dataset=dname,
                               objtype=genp_type,
                               flag=flag,
                               pt=ak.flatten(obj.pt, axis=None),
                               weight=evt_weights)
        accumulator["eta"].fill(dataset=dname,
                                objtype=genp_type,
                                flag=flag,
                                eta=ak.flatten(obj.eta, axis=None),
                                weight=evt_weights)
        accumulator["phi"].fill(dataset=dname,
                                objtype=genp_type,
                                flag=flag,
                                phi=ak.flatten(obj.phi, axis=None),
                                weight=evt_weights)
        accumulator["mass"].fill(dataset=dname,
                                 objtype=genp_type,
                                 flag=flag,
                                 mass=ak.flatten(obj.mass, axis=None),
                                 weight=evt_weights)
        accumulator["energy"].fill(dataset=dname,
                                   objtype=genp_type,
                                   flag=flag,
                                   energy=ak.flatten(obj.energy, axis=None),
                                   weight=evt_weights)

        return accumulator
Beispiel #6
0
def get_root_rest_energies(roots, energies, pxs, pys, pzs):
    """
    Find the energies (of anything really, but presumably jets) in the rest frame
    of particles identified a root particles
    
    Parameters
    ----------
    roots : array of bool
        mask identifying the root particles
    energies : array like of floats
        the energies of the particles
    pxs : array like of floats
        the momentum in the x direction of the particles
    pys : array like of floats
        the momentum in the y direction of the particles
    pzs : array like of floats
        the momentum in the z direction of the particles

    Returns
    -------
    energies : array like of floats
        the energies of the particles in the rest frame of the root
    """
    # if we are to use the roots as indices they must have this form
    energies = ak.to_numpy(energies)
    masses2 = energies**2 - pxs**2 - pys**2 - pzs**2
    pxs = pxs - ak.flatten(pxs[roots])
    pys = pys - ak.flatten(pys[roots])
    pzs = pzs - ak.flatten(pzs[roots])
    energies = np.sqrt(masses2 + pxs**2 + pys**2 + pzs**2)
    return energies
Beispiel #7
0
    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata["dataset"]
        print(events.metadata)
        if "checkusermeta" in events.metadata:
            metaname, metavalue = self.expected_usermeta[dataset]
            assert metavalue == events.metadata[metaname]

        mapping = events.behavior["__events_factory__"]._mapping
        muon_pt = events.Muon.pt
        if isinstance(mapping, nanoevents.mapping.CachedMapping):
            keys_in_cache = list(mapping.cache.cache.keys())
            has_canaries = [
                canary in keys_in_cache for canary in self._canaries
            ]
            if has_canaries:
                try:
                    from distributed import get_worker

                    worker = get_worker()
                    output["worker"].add(worker.name)
                except ValueError:
                    pass

        dimuon = ak.combinations(events.Muon, 2)
        dimuon = dimuon["0"] + dimuon["1"]

        output["pt"].fill(dataset=dataset, pt=ak.flatten(muon_pt))
        output["mass"].fill(dataset=dataset, mass=ak.flatten(dimuon.mass))
        output["cutflow"]["%s_pt" % dataset] += sum(ak.num(events.Muon))
        output["cutflow"]["%s_mass" % dataset] += sum(ak.num(dimuon))

        return output
Beispiel #8
0
    def process(self, df):
        ak.behavior.update(vector.behavior)
        output = self.accumulator.identity()

        dataset = df.metadata["dataset"]
        print(df.metadata)
        if "checkusermeta" in df.metadata:
            metaname, metavalue = self.expected_usermeta[dataset]
            assert metavalue == df.metadata[metaname]

        muon = ak.zip(
            {
                "pt": df.Muon_pt,
                "eta": df.Muon_eta,
                "phi": df.Muon_phi,
                "mass": df.Muon_mass,
            },
            with_name="PtEtaPhiMLorentzVector",
        )

        dimuon = ak.combinations(muon, 2)
        dimuon = dimuon["0"] + dimuon["1"]

        output["pt"].fill(dataset=dataset, pt=ak.flatten(muon.pt))
        output["mass"].fill(dataset=dataset, mass=ak.flatten(dimuon.mass))
        output["cutflow"]["%s_pt" % dataset] += np.sum(ak.num(muon))
        output["cutflow"]["%s_mass" % dataset] += np.sum(ak.num(dimuon))

        return output
Beispiel #9
0
def calcGeometricOffset(rCone, E, f_id, mu, mucut):
    E = ak.to_numpy(ak.flatten(E)).reshape(len(E), nEta)[mu > mucut]
    f_id = ak.to_numpy(ak.flatten(f_id)).reshape(len(f_id), nEta)[mu > mucut]
    if (len(f_id) != len(E)):
        print("Error")
    area = 2 * np.pi * (etabins[1:] - etabins[:-1])
    return E * f_id * np.pi * rCone * rCone / 255. / np.cosh(etaC) / area
    def _evaluate(self, *args):
        """ jec/jer = f(args) """
        bin_vals = {
            argname: args[self._dim_args[argname]]
            for argname in self._dim_order
        }
        eval_vals = {
            argname: args[self._eval_args[argname]]
            for argname in self._eval_vars
        }

        # lookup the bins that we care about
        dim1_name = self._dim_order[0]
        dim1_indices = numpy.clip(
            numpy.searchsorted(
                self._bins[dim1_name], bin_vals[dim1_name], side="right") - 1,
            0,
            self._bins[dim1_name].size - 2,
        )
        bin_indices = [dim1_indices]
        for binname in self._dim_order[1:]:
            bin_indices.append(
                masked_bin_eval(bin_indices[0], self._bins[binname],
                                bin_vals[binname]))

        bin_tuple = tuple(bin_indices)

        # get clamp values and clip the inputs
        eval_values = []
        for eval_name in self._eval_vars:
            clamp_mins = None
            if len(awkward.flatten(self._eval_clamp_mins[eval_name])) == 1:
                clamp_mins = awkward.flatten(
                    self._eval_clamp_mins[eval_name])[0]
            else:
                clamp_mins = numpy.array(
                    self._eval_clamp_mins[eval_name][bin_tuple]).squeeze()

            clamp_maxs = None
            if len(awkward.flatten(self._eval_clamp_maxs[eval_name])) == 1:
                clamp_maxs = awkward.flatten(
                    self._eval_clamp_maxs[eval_name])[0]
            else:
                clamp_maxs = numpy.array(
                    self._eval_clamp_maxs[eval_name][bin_tuple]).squeeze()

            eval_values.append(
                numpy.clip(eval_vals[eval_name], clamp_mins, clamp_maxs))

        # get parameter values
        parm_values = []
        if len(self._parms) > 0:
            parm_values = [
                numpy.array(parm[bin_tuple]).squeeze() for parm in self._parms
            ]

        return self._formula(*tuple(parm_values + eval_values))
Beispiel #11
0
def test():
    a = ak.layout.NumpyArray(np.empty(0))
    idx = ak.layout.Index64([])
    a = ak.layout.IndexedOptionArray64(idx, a)
    idx = ak.layout.Index64([0])
    a = ak.layout.ListOffsetArray64(idx, a)
    idx = ak.layout.Index64([175990832])
    a = ak.layout.ListOffsetArray64(idx, a)
    assert ak.flatten(a, axis=2).tolist() == []
    assert str(ak.flatten(a, axis=2).type) == "0 * var * ?float64"
Beispiel #12
0
def filter_with_mask(eventWise, mask_name, append=True):
    new_name, jet_name = get_filtered_name(mask_name)
    eventWise.selected_event = None

    # copy the hyper parameters
    hyperparameters = {}
    # not all jets have all input parameters,
    # so filter for what really exists
    for suffix in FormJets.get_jet_input_params():
        name = jet_name + '_' + suffix
        if name in eventWise.hyperparameter_columns:
            hyperparameters[new_name + '_' + suffix] = getattr(eventWise, name)

    # not to copy the parameters
    ps_mask = getattr(eventWise, mask_name)
    jet_ints = ['_' + name for name in FormJets.Clustering.int_columns]
    label_column = FormJets.Clustering.int_columns.index("Label")
    jet_floats = ['_' + name for name in FormJets.Clustering.float_columns]
    child_ps_mask = ps_mask * getattr(eventWise, jet_name + "_Child1") == -1
    # don't cut corners, make it using proper jets....
    content = {
        new_name + suffix: [[] for _ in child_ps_mask]
        for suffix in jet_ints + jet_floats
    }
    for event_n, event_mask in enumerate(child_ps_mask):
        eventWise.selected_event = event_n
        # thses are only the floats that have passed the mask
        event_floats = [
            ak.to_numpy(
                ak.flatten(getattr(eventWise, jet_name + suffix)[event_mask]))
            for suffix in jet_floats
        ]
        event_floats = np.vstack(event_floats).T
        # the labels need to be preserved
        event_ints = -np.ones(
            (len(event_floats), len(FormJets.Clustering.int_columns)))
        event_labels = getattr(eventWise, jet_name + "_Label")[event_mask]
        event_ints[:, label_column] = ak.flatten(event_labels)
        # now make a partitional jet of these values
        jets = FormJets.ManualPartitional((event_ints, event_floats))
        # and make it cluster like the real jets
        for jet_n, labels in enumerate(event_labels):
            jets.create_jet(ak.to_list(labels))
        # split the partitional object, and read out the created values
        for jet in jets.split():
            mask = jet.Label != -1
            for suffix in jet_ints + jet_floats:
                new_content = getattr(jet, suffix[1:])[mask]
                content[new_name + suffix][event_n].append(new_content)
    eventWise.selected_event = None

    if append:
        eventWise.append_hyperparameters(**hyperparameters)
        eventWise.append(**content)
    return hyperparameters, content
Beispiel #13
0
def convert_junc_txt_component(juncFilePath, uncFile):
    (
        name,
        layout,
        pars,
        nBinnedVars,
        nBinColumns,
        nEvalVars,
        formula,
        nParms,
        columns,
        dtypes,
    ) = _parse_jme_formatted_file(juncFilePath,
                                  interpolatedFunc=True,
                                  parmsFromColumns=True,
                                  jme_f=uncFile)

    temp = _build_standard_jme_lookup(
        name,
        layout,
        pars,
        nBinnedVars,
        nBinColumns,
        nEvalVars,
        formula,
        nParms,
        columns,
        dtypes,
        interpolatedFunc=True,
    )
    wrapped_up = {}
    for key, val in temp.items():
        newkey = (key[0], "jec_uncertainty_lookup")
        vallist = list(val)
        vals, names = vallist[-1]
        knots = vals[0:len(vals):3]
        downs = vals[1:len(vals):3]
        ups = vals[2:len(vals):3]
        downs = numpy.array(
            [numpy.array(awkward.flatten(down)) for down in downs])
        ups = numpy.array([numpy.array(awkward.flatten(up)) for up in ups])
        for knotv in knots:
            knot = numpy.unique(numpy.array(awkward.flatten(knotv)))
            if knot.size != 1:
                raise Exception("Multiple bin low edges found")
        knots = numpy.array(
            [numpy.unique(numpy.array(awkward.flatten(k)))[0] for k in knots])
        vallist[2] = ({
            "knots": knots,
            "ups": ups.T,
            "downs": downs.T
        }, vallist[2][-1])
        vallist = vallist[:-1]
        wrapped_up[newkey] = tuple(vallist)
    return wrapped_up
Beispiel #14
0
def test_jet_resolution():
    from coffea.jetmet_tools import JetResolution

    counts, test_eta, test_pt = dummy_jagged_eta_pt()
    test_Rho = np.full_like(test_eta, 10.0)

    test_pt_jag = ak.unflatten(test_pt, counts)
    test_eta_jag = ak.unflatten(test_eta, counts)
    test_Rho_jag = ak.unflatten(test_Rho, counts)

    jer_names = ["Spring16_25nsV10_MC_PtResolution_AK4PFPuppi"]
    reso = JetResolution(**{name: evaluator[name] for name in jer_names})

    print(reso)

    resos = reso.getResolution(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt)
    resos_jag = reso.getResolution(JetEta=test_eta_jag,
                                   Rho=test_Rho_jag,
                                   JetPt=test_pt_jag)
    assert ak.all(np.abs(resos - ak.flatten(resos_jag)) < 1e-6)

    test_pt_jag = test_pt_jag[0:3]
    test_eta_jag = test_eta_jag[0:3]
    test_Rho_jag = test_Rho_jag[0:3]
    test_Rho_jag = ak.concatenate(
        [test_Rho_jag[:-1], [ak.concatenate([test_Rho_jag[-1, :-1], 100.0])]])
    counts = counts[0:3]
    print("Raw jet values:")
    print("pT:", test_pt_jag)
    print("eta:", test_eta_jag)
    print("rho:", test_Rho_jag, "\n")

    resos_jag_ref = ak.unflatten(
        np.array([
            0.21974642,
            0.32421591,
            0.33702479,
            0.27420327,
            0.13940689,
            0.48134521,
            0.26564994,
            1.0,
        ]),
        counts,
    )
    resos_jag = reso.getResolution(JetEta=test_eta_jag,
                                   Rho=test_Rho_jag,
                                   JetPt=test_pt_jag)
    print("Reference Resolution (jagged):", resos_jag_ref)
    print("Resolution (jagged):", resos_jag)
    # NB: 5e-4 tolerance was agreed upon by lgray and aperloff, if the differences get bigger over time
    #     we need to agree upon how these numbers are evaluated (double/float conversion is kinda random)
    assert ak.all(
        np.abs(ak.flatten(resos_jag_ref) - ak.flatten(resos_jag)) < 5e-4)
Beispiel #15
0
def test_jet_correction_uncertainty():
    from coffea.jetmet_tools import JetCorrectionUncertainty

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    test_pt_jag = ak.unflatten(test_pt, counts)
    test_eta_jag = ak.unflatten(test_eta, counts)

    junc_names = ["Summer16_23Sep2016V3_MC_Uncertainty_AK4PFPuppi"]
    junc = JetCorrectionUncertainty(
        **{name: evaluator[name]
           for name in junc_names})

    print(junc)

    juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt)

    juncs_jag = list(
        junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag))

    for i, (level, corrs) in enumerate(juncs):
        assert corrs.shape[0] == test_eta.shape[0]
        assert ak.all(corrs == ak.flatten(juncs_jag[i][1]))

    test_pt_jag = test_pt_jag[0:3]
    test_eta_jag = test_eta_jag[0:3]
    counts = counts[0:3]
    print("Raw jet values:")
    print("pT:", test_pt_jag.tolist())
    print("eta:", test_eta_jag.tolist(), "\n")

    juncs_jag_ref = ak.unflatten(
        np.array([
            [1.053504214, 0.946495786],
            [1.033343349, 0.966656651],
            [1.065159157, 0.934840843],
            [1.033140127, 0.966859873],
            [1.016858652, 0.983141348],
            [1.130199999, 0.869800001],
            [1.039968468, 0.960031532],
            [1.033100002, 0.966899998],
        ]),
        counts,
    )
    juncs_jag = list(
        junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag))

    for i, (level, corrs) in enumerate(juncs_jag):
        print("Index:", i)
        print("Correction level:", level)
        print("Reference Uncertainties (jagged):", juncs_jag_ref)
        print("Uncertainties (jagged):", corrs)
        assert ak.all(
            np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6)
Beispiel #16
0
def hash_root_file(path: Path, ordering_invariant: bool = True) -> str:
    rf = uproot.open(path)

    gh = hashlib.sha256()

    for tree_name in sorted(rf.keys()):
        gh.update(tree_name.encode("utf8"))

        try:
            tree = rf[tree_name]
            if not isinstance(tree, uproot.TTree):
                continue
        except NotImplementedError:
            continue
        keys = list(sorted(tree.keys()))

        branches = tree.arrays(library="ak")

        if not ordering_invariant:

            h = hashlib.sha256()
            for name in keys:
                h.update(name.encode("utf8"))
                arr = branches[name]
                arr = ak.flatten(arr, axis=None)
                arr = np.array(arr)
                h.update(arr.tobytes())
            gh.update(h.digest())

        else:
            items = np.array([])

            for row in zip(*[branches[b] for b in keys]):
                h = hashlib.md5()
                for obj in row:
                    if isinstance(obj, ak.highlevel.Array):
                        if obj.ndim == 1:
                            h.update(ak.to_numpy(obj).tobytes())
                        else:
                            arr = ak.to_numpy(ak.flatten(obj, axis=None))
                            h.update(arr.tobytes())
                    else:
                        h.update(np.array([obj]).tobytes())
                items = np.append(items, h.digest())

            items.sort()

            h = hashlib.sha256()
            h.update("".join(keys).encode("utf8"))
            h.update(items.tobytes())

            gh.update(h.digest())
    return gh.hexdigest()
 def process(self, events):
     return (hist.Hist.new.Reg(100,
                               0,
                               200,
                               name="ptj",
                               label="Jet $p_{T}$ [GeV]").Reg(
                                   100,
                                   -5,
                                   5,
                                   name="etaj",
                                   label=r"Jet $\eta$").Double().fill(
                                       ak.flatten(events.Jet.pt),
                                       ak.flatten(events.Jet.eta)))
def test():
    assert ak.flatten(ak.Array([[1, 2, 3], [], [4, 5]]), axis=0).tolist() == [
        [1, 2, 3],
        [],
        [4, 5],
    ]
    assert ak.flatten(ak.Array([1, 2, 3, 4, 5]), axis=0).tolist() == [1, 2, 3, 4, 5]
    assert ak.flatten(ak.Array([[1, 2, 3], [], [4, 5]]), axis=-2).tolist() == [
        [1, 2, 3],
        [],
        [4, 5],
    ]
    assert ak.flatten(ak.Array([1, 2, 3, 4, 5]), axis=-1).tolist() == [1, 2, 3, 4, 5]
Beispiel #19
0
def get_event(ew, jet_name):
    """ Get the kinematics of a single event"""
    roots = getattr(ew, jet_name + "_Parent") == -1
    pts = ak.flatten(getattr(ew, jet_name + "_PT")[roots])
    top_4 = np.argsort(pts)[-4:]
    values = []
    variables = ["Energy", "Px", "Py", "Pz"]
    for variable in variables:
        vals = ak.flatten(getattr(ew, jet_name + "_" + variable)[roots])[top_4]
        values.append(vals)
        
    mass = np.sqrt(values[0]**2 - values[1]**2 - values[2]**2 - values[3]**2)
    values = [mass] + values
    return values
Beispiel #20
0
def test_ByteMaskedArray_flatten():
    content = ak.from_iter(
        [
            [[0.0, 1.1, 2.2], [], [3.3, 4.4]],
            [],
            [[5.5]],
            [[6.6, 7.7, 8.8, 9.9]],
            [[], [10.0, 11.1, 12.2]],
        ],
        highlevel=False,
    )
    mask = ak.layout.Index8(np.array([0, 0, 1, 1, 0], dtype=np.int8))
    array = ak.Array(ak.layout.ByteMaskedArray(mask, content,
                                               valid_when=False))
    assert ak.to_list(array) == [
        [[0.0, 1.1, 2.2], [], [3.3, 4.4]],
        [],
        None,
        None,
        [[], [10.0, 11.1, 12.2]],
    ]
    assert ak.to_list(ak.flatten(array, axis=1)) == [
        [0.0, 1.1, 2.2],
        [],
        [3.3, 4.4],
        [],
        [10.0, 11.1, 12.2],
    ]
    assert ak.to_list(ak.flatten(array, axis=-2)) == [
        [0.0, 1.1, 2.2],
        [],
        [3.3, 4.4],
        [],
        [10.0, 11.1, 12.2],
    ]
    assert ak.to_list(ak.flatten(array, axis=2)) == [
        [0.0, 1.1, 2.2, 3.3, 4.4],
        [],
        None,
        None,
        [10.0, 11.1, 12.2],
    ]
    assert ak.to_list(ak.flatten(array, axis=-1)) == [
        [0.0, 1.1, 2.2, 3.3, 4.4],
        [],
        None,
        None,
        [10.0, 11.1, 12.2],
    ]
Beispiel #21
0
def get_all_vars(varsIn, varSet, normMean, normStd):
    dSets = []
    dataSet = pd.DataFrame()
    for var in varSet:
        inputArr = varsIn[var][0]
        if variables[var][4] == 2:
            inputArr = np.repeat(ak.to_numpy(inputArr),
                                 ak.to_numpy(varsIn["njetsAK8"][0]))
        if variables[var][5] == 1:
            inputArr = ak.flatten(inputArr)
        elif variables[var][5] == 2:
            inputArr = ak.flatten(inputArr)
        dataSet[var] = inputArr
    dataSet = normalize(dataSet, normMean, normStd)
    return dataSet
def test_upper_layers():
    # will need an eventwise with Parents, Children, MCPID
    # layer     -1  0       1    1      -1   2   2          3   3   3   -1
    # idx       0   1       2    3       4   5       6          7   8   9   10
    children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []]
    parents = [[], [], [1], [1], [], [2, 3], [3], [6], [6], [6], []]
    mcpid = [4, 5, 5, 3, 2, 1, -5, -1, 7, 11, 12]
    expected = [2, 6]
    labeler = PDGNames.IDConverter()
    with TempTestDir("tst") as dir_name:
        eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet"))
        eventWise.append(Children=[ak.from_iter(children)],
                         Parents=[ak.from_iter(parents)],
                         MCPID=[ak.from_iter(mcpid)])
        eventWise.selected_event = 0
        expected_particle_idx = [0, 1, 2, 3, 4, 10]
        expected_children = ak.from_iter(
            [c for i in expected_particle_idx for c in children[i]])
        expected_parents = ak.from_iter(
            [p for i in expected_particle_idx for p in parents[i]])
        expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx]
        shower = FormShower.upper_layers(eventWise, n_layers=2)
        order = np.argsort(shower.particle_idxs)
        tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx)
        tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])),
                            expected_children)
        tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])),
                            expected_parents)
        for a, b in zip(shower.labels[order], expected_labels):
            assert a == b
        # try with capture pids
        expected_particle_idx = [0, 1, 2, 3, 4, 5, 6, 10]
        expected_children = ak.from_iter(
            [c for i in expected_particle_idx for c in children[i]])
        expected_parents = ak.from_iter(
            [p for i in expected_particle_idx for p in parents[i]])
        expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx]
        shower = FormShower.upper_layers(eventWise,
                                         n_layers=2,
                                         capture_pids=[1])
        order = np.argsort(shower.particle_idxs)
        tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx)
        tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])),
                            expected_children)
        tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])),
                            expected_parents)
        for a, b in zip(shower.labels[order], expected_labels):
            assert a == b
Beispiel #23
0
def apply_roccor(df, rochester, is_mc):
    if is_mc:
        hasgen = ~np.isnan(ak.fill_none(df.Muon.matched_gen.pt, np.nan))
        mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(df.Muon.pt)).shape)
        mc_rand = ak.unflatten(mc_rand, ak.num(df.Muon.pt, axis=1))

        corrections = np.array(ak.flatten(ak.ones_like(df.Muon.pt)))
        errors = np.array(ak.flatten(ak.ones_like(df.Muon.pt)))
        mc_kspread = rochester.kSpreadMC(
            df.Muon.charge[hasgen],
            df.Muon.pt[hasgen],
            df.Muon.eta[hasgen],
            df.Muon.phi[hasgen],
            df.Muon.matched_gen.pt[hasgen],
        )

        mc_ksmear = rochester.kSmearMC(
            df.Muon.charge[~hasgen],
            df.Muon.pt[~hasgen],
            df.Muon.eta[~hasgen],
            df.Muon.phi[~hasgen],
            df.Muon.nTrackerLayers[~hasgen],
            mc_rand[~hasgen],
        )

        errspread = rochester.kSpreadMCerror(
            df.Muon.charge[hasgen],
            df.Muon.pt[hasgen],
            df.Muon.eta[hasgen],
            df.Muon.phi[hasgen],
            df.Muon.matched_gen.pt[hasgen],
        )
        errsmear = rochester.kSmearMCerror(
            df.Muon.charge[~hasgen],
            df.Muon.pt[~hasgen],
            df.Muon.eta[~hasgen],
            df.Muon.phi[~hasgen],
            df.Muon.nTrackerLayers[~hasgen],
            mc_rand[~hasgen],
        )
        hasgen_flat = np.array(ak.flatten(hasgen))
        corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread))
        corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear))
        errors[hasgen_flat] = np.array(ak.flatten(errspread))
        errors[~hasgen_flat] = np.array(ak.flatten(errsmear))

        corrections = ak.unflatten(corrections, ak.num(df.Muon.pt, axis=1))
        errors = ak.unflatten(errors, ak.num(df.Muon.pt, axis=1))

    else:
        corrections = rochester.kScaleDT(df.Muon.charge, df.Muon.pt,
                                         df.Muon.eta, df.Muon.phi)
        errors = rochester.kScaleDTerror(df.Muon.charge, df.Muon.pt,
                                         df.Muon.eta, df.Muon.phi)

    df["Muon", "pt_roch"] = df.Muon.pt * corrections
    df["Muon", "pt_roch_up"] = df.Muon.pt_roch + df.Muon.pt * errors
    df["Muon", "pt_roch_down"] = df.Muon.pt_roch - df.Muon.pt * errors
Beispiel #24
0
    def calculate_selection(self, syst_tag, events):
        """

        """
        electrons = events.ele
        electrons["label"] = -1 * awkward.ones_like(electrons.pt)
        if not self.is_data:
            electrons["label"] = awkward.where(
                electrons.genPartFlav == 1, awkward.ones_like(electrons.label),
                electrons.label)
            electrons["label"] = awkward.where(
                (electrons.genPartFlav == 3) | (electrons.genPartFlav == 4) |
                (electrons.genPartFlav == 5),
                awkward.zeros_like(electrons.label), electrons.label)

        fields = [x for x in events.fields if x not in ["ele", "Electron"]]
        for x in fields:
            if x == "ZCand":
                electrons[x] = awkward.firsts(events[x])
            else:
                electrons[x] = events[x]

        electrons = awkward.flatten(electrons)

        dummy_cut = electrons.pt >= 0
        return dummy_cut, electrons
Beispiel #25
0
def _ak_to_numpy(ak_array, fields):
    """
    Convert the given awkward array to a numpy table.

    Parameters
    ----------
    ak_array : awkward.Array
        The awkward array, 2D or 3D.
    fields : List
        The column names of the last axis of the array.

    Returns
    -------
    np_branch : tuple
        Numpy-fied awkward array. See output of _branch_to_numpy.

    """
    n_dims = ak_array.ndim - 1
    if n_dims == 1:
        n_items = np.ones(len(ak_array), dtype="int64")
    elif n_dims == 2:
        n_items = ak.num(ak_array).to_numpy()
        ak_array = ak.flatten(ak_array)
    else:
        raise ValueError("Can not process array")

    filled = np.ma.filled(
        ak.pad_none(ak_array, target=len(fields), axis=-1).to_numpy(),
        fill_value=np.nan,
    )
    return {fields[i]: filled[:, i] for i in range(len(fields))}, n_items
Beispiel #26
0
def test_jet_correction_regrouped_uncertainty_sources():
    from coffea.jetmet_tools import JetCorrectionUncertainty

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    test_pt_jag = ak.unflatten(test_pt, counts)
    test_eta_jag = ak.unflatten(test_eta, counts)

    junc_names = []
    levels = []
    for name in dir(evaluator):
        if 'Regrouped_Fall17_17Nov2017_V32_MC_UncertaintySources_AK4PFchs' in name:
            junc_names.append(name)
            if len(name.split('_')) == 9:
                levels.append("_".join(name.split('_')[-2:]))
            else:
                levels.append(name.split('_')[-1])
    junc = JetCorrectionUncertainty(
        **{name: evaluator[name]
           for name in junc_names})

    print(junc)

    juncs_jag = list(
        junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag))

    for i, tpl in enumerate(
            list(junc.getUncertainty(JetEta=test_eta, JetPt=test_pt))):
        assert (tpl[0] in levels)
        assert (tpl[1].shape[0] == test_eta.shape[0])
        assert (ak.all(tpl[1] == ak.flatten(juncs_jag[i][1])))
Beispiel #27
0
def test_jet_correction_uncertainty_sources():
    from coffea.jetmet_tools import JetCorrectionUncertainty

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    test_pt_jag = ak.unflatten(test_pt, counts)
    test_eta_jag = ak.unflatten(test_eta, counts)

    junc_names = []
    levels = []
    for name in dir(evaluator):
        if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in name:
            junc_names.append(name)
            levels.append(name.split('_')[-1])
        #test for underscore in dataera
        if 'Fall17_17Nov2017_V6_MC_UncertaintySources_AK4PFchs_AbsoluteFlavMap' in name:
            junc_names.append(name)
            levels.append(name.split('_')[-1])
    junc = JetCorrectionUncertainty(
        **{name: evaluator[name]
           for name in junc_names})

    print(junc)

    juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt)

    juncs_jag = list(
        junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag))

    for i, (level, corrs) in enumerate(juncs):
        assert (level in levels)
        assert (corrs.shape[0] == test_eta.shape[0])
        tic = time.time()
        assert (ak.all(corrs == ak.flatten(juncs_jag[i][1])))
        toc = time.time()
def flatten_idxs(idx_in, jaggedarray):
    """
    This provides a faster way to convert between tuples of
    jagged indices and flat indices in a jagged array's contents
    """
    if len(idx_in) == 0:
        return numpy.array([], dtype=numpy.int)
    idx_out = jaggedarray.starts[idx_in[0]]
    if len(idx_in) == 1:
        pass
    elif len(idx_in) == 2:
        idx_out += idx_in[1]
    else:
        raise Exception(
            "jme_standard_function only works for two binning dimensions!")

    flattened = awkward.flatten(jaggedarray)
    good_idx = idx_out < len(flattened)
    if (~good_idx).any():
        input_idxs = tuple([idx_out[~good_idx]] +
                           [idx_in[i][~good_idx] for i in range(len(idx_in))])
        raise Exception("Calculated invalid index {} for"
                        " array with length {}".format(
                            numpy.vstack(input_idxs), len(flattened)))

    return idx_out
def test():
    array = ak.Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert ak.unflatten(array, 5).tolist() == [[0, 1, 2, 3, 4],
                                               [5, 6, 7, 8, 9]]
    assert ak.unflatten(array, [3, 0, 2, 1, 4]).tolist() == [
        [0, 1, 2],
        [],
        [3, 4],
        [5],
        [6, 7, 8, 9],
    ]
    assert ak.unflatten(array, [3, None, 2, 1, 4]).tolist() == [
        [0, 1, 2],
        None,
        [3, 4],
        [5],
        [6, 7, 8, 9],
    ]

    original = ak.Array([[0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9]])
    counts = ak.num(original)
    array = ak.flatten(original)
    assert counts.tolist() == [3, 0, 2, 1, 4]
    assert array.tolist() == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    assert ak.unflatten(array, counts).tolist() == [
        [0, 1, 2],
        [],
        [3, 4],
        [5],
        [6, 7, 8, 9],
    ]
Beispiel #30
0
def test_root_scalefactors():
    extractor = lookup_tools.extractor()
    extractor.add_weight_sets([
        "testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root"
    ])

    extractor.finalize(reduce_list=["testSF2d"])

    evaluator = extractor.make_evaluator()

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    # test flat eval
    test_out = evaluator["testSF2d"](test_eta, test_pt)

    # print it
    print(evaluator["testSF2d"])

    # test structured eval
    test_eta_jagged = ak.unflatten(test_eta, counts)
    test_pt_jagged = ak.unflatten(test_pt, counts)
    test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged)

    assert ak.all(ak.num(test_out_jagged) == counts)
    assert ak.all(ak.flatten(test_out_jagged) == test_out)

    print(test_out)

    diff = np.abs(test_out - _testSF2d_expected_output)
    print("Max diff: %.16f" % diff.max())
    print("Median diff: %.16f" % np.median(diff))
    print("Diff over threshold rate: %.1f %%" %
          (100 * (diff >= 1.0e-8).sum() / diff.size))
    assert (diff < 1.0e-8).all()