Example #1
0
def run_deltar_matching(store,
                        target,
                        drname='deltaR',
                        radius=0.4,
                        unique=False,
                        sort=False):
    """
  Running a delta R matching of some object collection "store" of dimension NxS
  with some target collection "target" of dimension NxT, The return object will
  have dimension NxSxT' where objects in the T' contain all "target" objects
  within the delta R radius. The delta R between the store and target object will
  be stored in the field `deltaR`. If the unique flag is turned on, then objects
  in the target collection will only be associated to the closest object. If the
  sort flag is turned on, then the target collection will be sorted according to
  the computed `deltaR`.
  """
    _, target = ak.unzip(ak.cartesian([store.eta, target], nested=True))
    target[drname] = delta_r(store, target)
    if unique:  # Additional filtering
        t_index = ak.argmin(target[drname], axis=-2)
        s_index = ak.local_index(store.eta, axis=-1)
        _, t_index = ak.unzip(ak.cartesian([s_index, t_index], nested=True))
        target = target[s_index == t_index]

    # Cutting on the computed delta R
    target = target[target[drname] < radius]

    # Sorting according to the computed delta R
    if sort:
        idx = ak.argsort(target[drname], axis=-1)
        target = target[idx]
    return target
Example #2
0
 def get_taus(self, apply_selection=True):
     events = self.get_events()
     taus_dict = {"e": events.tau_e, "pt": events.tau_pt, "eta": events.tau_eta, "phi": events.tau_phi,
                  "looseIsoAbs": events.tau_looseIsoAbs, "looseIsoRel": events.tau_looseIsoRel,
                  "mediumIsoAbs": events.tau_mediumIsoAbs, "mediumIsoRel": events.tau_mediumIsoRel,
                  "tightIsoAbs": events.tau_tightIsoAbs, "tightIsoRel": events.tau_tightIsoRel,
                  "gen_e": events.gen_tau_e, "gen_pt": events.gen_tau_pt, "gen_eta": events.gen_tau_eta,
                  "gen_phi": events.gen_tau_phi,
                  "lepton_gen_match": events.lepton_gen_match, "deepTau_VSjet": events.deepTau_VSjet}
     if self.is_old:
         taus = ak.zip(taus_dict)
         index = ak.argsort(taus.pt, ascending=False)
         taus = taus[index]
         tau_1, tau_2 = ak.unzip(ak.combinations(taus, 2, axis=1))
     else:
         taus_dict["vz"] = events.tau_vz
         taus = ak.zip(taus_dict)
         index = ak.argsort(taus.pt, ascending=False)
         taus = taus[index]
         tau_1, tau_2 = ak.unzip(ak.combinations(taus, 2, axis=1))
         if apply_selection:
             L1taus = ak.zip({"e": events.L1tau_e, "pt": events.L1tau_pt, "eta": events.L1tau_eta,
                              "phi": events.L1tau_phi})
             # apply L1seed correction in case Pt28 and Pt30 seeds are considered
             L1taus, taus = L1seed_correction(L1taus, taus)
             # match taus with L1 taus
             taus = L1THLTTauMatching(L1taus, taus)
             tau_1, tau_2 = HLTJetPairDzMatchFilter(taus)
     # Return all possible pairs of tau which pass preselection
     return tau_1, tau_2
Example #3
0
def test():
    a = ak.Array([{"this": 100}])
    b = ak.Array([{"this": 90, "that": 100}])
    c = ak.concatenate((a, b))

    with pytest.raises(ValueError):
        ak.unzip(c)

    a = ak.Array([{"this": 100}])
    b = ak.Array([{"this": 90}])
    c = ak.concatenate((a, b))

    (tmp, ) = ak.unzip(c)

    assert tmp.tolist() == [100, 90]
Example #4
0
    def metric_table(self,
                     other,
                     axis=1,
                     metric=lambda a, b: a.delta_r(b),
                     return_combinations=False):
        """Return a list of a metric evaluated between this object and another.

        The two arrays should be broadcast-compatible on all axes other than the specified
        axis, which will be used to form a cartesian product. If axis=None, broadcast arrays directly.
        The return shape will be that of ``self`` with a new axis with shape of ``other`` appended
        at the specified axis depths.

        Parameters
        ----------
            other : awkward.Array
                Another array with same shape in all but ``axis``
            axis : int, optional
                The axis to form the cartesian product (default 1). If None, the metric
                is directly evaluated on the input arrays (i.e. they should broadcast)
            metric : callable
                A function of two arguments, returning a scalar. The default metric is `delta_r`.
            return_combinations : bool
                If True return the combinations of inputs as well as an unzipped tuple
        """
        if axis is None:
            a, b = self, other
        else:
            a, b = awkward.unzip(
                awkward.cartesian([self, other], axis=axis, nested=True))
        mval = metric(a, b)
        if return_combinations:
            return mval, (a, b)
        return mval
Example #5
0
 def get_gen_taus(self):
     events = self.get_gen_events()
     gen_taus = ak.zip({"gen_e": events.gen_tau_e, "gen_pt": events.gen_tau_pt, "gen_eta": events.gen_tau_eta,
                                   "gen_phi": events.gen_tau_phi,
                                   "lepton_gen_match": events.lepton_gen_match})
     index = ak.argsort(gen_taus.gen_pt, ascending=False)
     gen_taus = gen_taus[index]
     gen_tau_1, gen_tau_2 = ak.unzip(ak.combinations(gen_taus, 2, axis=1))
     return gen_tau_1, gen_tau_2
Example #6
0
def L1THLTTauMatching(L1taus, taus):
    dR_matching = 0.5
    tau_inpair, L1_inpair = ak.unzip(ak.cartesian([taus, L1taus], nested=True))
    # dR = delta_r(L1_inpair, tau_inpair)
    dR = delta_r(tau_inpair, L1_inpair)
    # # print(dR[range(2)])
    # # consider only L1taus for which there is at least 1 matched tau
    # tau_inpair = tau_inpair[dR<dR_matching]
    mask = ak.sum(dR < dR_matching, axis=-1) > 0
    # tau_inpair = tau_inpair[mask]
    # # take first matched tau for each L1tau
    # L2taus = tau_inpair[:,:,0]
    # # print(L2taus[range(2)])
    L2taus = taus[mask]
    return L2taus
Example #7
0
def HLTJetPairDzMatchFilter(L2taus):
    jetMinPt = 20.0
    jetMaxEta = 2.1
    jetMinDR = 0.5
    jetMaxDZ = 0.2
    L2taus = L2taus[reco_tau_selection(L2taus,
                                       minPt=jetMinPt,
                                       maxEta=jetMaxEta)]
    # Take all possible pairs of L2 taus
    L2tau_1, L2tau_2 = ak.unzip(ak.combinations(L2taus, 2, axis=1))
    dr2 = delta_r2(L2tau_1, L2tau_2)
    dz = delta_z(L2tau_1, L2tau_2)
    pair_mask = (dr2 >= jetMinDR * jetMinDR) & (abs(dz) <= jetMaxDZ)
    # ev_mask = ak.sum(pair_mask, axis=1) > 0

    return L2tau_1[pair_mask], L2tau_2[pair_mask]
Example #8
0
def eval_model(model, dataset, config, outdir):

    ibatch = 0

    jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)

    for elem in tqdm(dataset, desc="Evaluating model"):
        y_pred = model.predict(elem["X"], verbose=False)

        np_outfile = "{}/pred_batch{}.npz".format(outdir, ibatch)

        ygen = unpack_target(elem["ygen"],
                             config["dataset"]["num_output_classes"], config)
        ycand = unpack_target(elem["ycand"],
                              config["dataset"]["num_output_classes"], config)

        outs = {}

        for key in y_pred.keys():
            outs["gen_{}".format(key)] = ygen[key].numpy()
            outs["cand_{}".format(key)] = ycand[key].numpy()
            outs["pred_{}".format(key)] = y_pred[key]

        jets_coll = {}
        jets_const = {}
        for typ in ["gen", "cand", "pred"]:
            cls_id = np.argmax(outs["{}_cls".format(typ)], axis=-1)
            valid = cls_id != 0
            pt = awkward.from_iter(
                [y[m][:, 0] for y, m in zip(outs["{}_pt".format(typ)], valid)])
            eta = awkward.from_iter([
                y[m][:, 0] for y, m in zip(outs["{}_eta".format(typ)], valid)
            ])

            phi = np.arctan2(outs["{}_sin_phi".format(typ)],
                             outs["{}_cos_phi".format(typ)])
            phi = awkward.from_iter([y[m][:, 0] for y, m in zip(phi, valid)])
            e = awkward.from_iter([
                y[m][:, 0]
                for y, m in zip(outs["{}_energy".format(typ)], valid)
            ])

            vec = vector.arr({"pt": pt, "eta": eta, "phi": phi, "e": e})

            cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)

            jets = cluster.inclusive_jets()
            jet_constituents = cluster.constituent_index()
            jets_coll[typ] = jets[jets.pt > 5.0]
            jets_const[typ] = jet_constituents[jets.pt > 5.0]

        for key in ["pt", "eta", "phi", "energy"]:
            outs["jets_gen_{}".format(key)] = awkward.to_numpy(
                awkward.flatten(getattr(jets_coll["gen"], key)))
            outs["jets_cand_{}".format(key)] = awkward.to_numpy(
                awkward.flatten(getattr(jets_coll["cand"], key)))
            outs["jets_pred_{}".format(key)] = awkward.to_numpy(
                awkward.flatten(getattr(jets_coll["pred"], key)))

        # DeltaR match between genjets and PF/MLPF jets
        cart = awkward.cartesian([jets_coll["gen"], jets_coll["pred"]],
                                 nested=True)
        jets_a, jets_b = awkward.unzip(cart)
        drs = deltar(jets_a, jets_b)
        match_gen_to_pred = [awkward.where(d < 0.1) for d in drs]
        m0 = awkward.from_iter([m[0] for m in match_gen_to_pred])
        m1 = awkward.from_iter([m[1] for m in match_gen_to_pred])
        j1s = jets_coll["gen"][m0]
        j2s = jets_coll["pred"][m1]

        outs["jets_pt_gen_to_pred"] = np.stack([
            awkward.to_numpy(awkward.flatten(j1s.pt)),
            awkward.to_numpy(awkward.flatten(j2s.pt))
        ],
                                               axis=-1)

        cart = awkward.cartesian([jets_coll["gen"], jets_coll["cand"]],
                                 nested=True)
        jets_a, jets_b = awkward.unzip(cart)
        drs = deltar(jets_a, jets_b)
        match_gen_to_pred = [awkward.where(d < 0.1) for d in drs]
        m0 = awkward.from_iter([m[0] for m in match_gen_to_pred])
        m1 = awkward.from_iter([m[1] for m in match_gen_to_pred])
        j1s = jets_coll["gen"][m0]
        j2s = jets_coll["cand"][m1]

        outs["jets_pt_gen_to_cand"] = np.stack([
            awkward.to_numpy(awkward.flatten(j1s.pt)),
            awkward.to_numpy(awkward.flatten(j2s.pt))
        ],
                                               axis=-1)

        np.savez(np_outfile, X=elem["X"], **outs)

        ibatch += 1
Example #9
0
def parse_to_parquet(base_output_filename: Union[Path, str], store_only_necessary_columns: bool,
                     input_filename: Union[Path, str], events_per_chunk: int, parser: str = "pandas",
                     max_chunks: int = -1, compression: str = "zstd", compression_level: Optional[int] = None) -> None:
    """ Parse the JETSCAPE ASCII and convert it to parquet, (potentially) storing only the minimum necessary columns.

    Args:
        base_output_filename: Basic output filename. Should include the entire path.
        store_only_necessary_columns: If True, store only the necessary columns, rather than all of them.
        input_filename: Filename of the input JETSCAPE ASCII file.
        events_per_chunk: Number of events to be read per chunk.
        parser: Name of the parser. Default: "pandas".
        max_chunks: Maximum number of chunks to read. Default: -1.
        compression: Compression algorithm for parquet. Default: "zstd". Options include: ["snappy", "gzip", "ztsd"].
            "gzip" is slightly better for storage, but slower. See the compression tests and parquet docs for more.
        compression_level: Compression level for parquet. Default: `None`, which lets parquet choose the best value.
    Returns:
        None. The parsed events are stored in parquet files.
    """
    # Validation
    base_output_filename = Path(base_output_filename)
    # Setup the base output directory
    base_output_filename.parent.mkdir(parents=True, exist_ok=True)
    # We will check which fields actually exist when writing.
    possible_fields_containing_floats = ["event_plane_angle", "event_weight", "cross_section", "cross_section_error", "px", "py", "pz", "E"]

    for i, arrays in enumerate(read(filename=input_filename, events_per_chunk=events_per_chunk, parser=parser)):
        # Reduce to the minimum required data.
        if store_only_necessary_columns:
            arrays = full_events_to_only_necessary_columns_E_px_py_pz(arrays)
        else:
            # To match the steps taken when reducing the columns, we'll re-zip with the depth limited to 1.
            # As of April 2021, I'm not certainly this is truly required anymore, but it may be needed for
            # parquet writing to be successful (apparently parquet couldn't handle lists of structs sometime
            # in 2020. The status in April 2021 is unclear, but not worth digging into now).
            arrays = ak.zip(
                dict(zip(ak.fields(arrays), ak.unzip(arrays))),
                depth_limit = 1
            )

        # If converting in chunks, add an index to the output file so the chunks don't overwrite each other.
        if events_per_chunk > 0:
            suffix = base_output_filename.suffix
            output_filename = (base_output_filename.parent / f"{base_output_filename.stem}_{i:02}").with_suffix(suffix)
        else:
            output_filename = base_output_filename

        # Optimize the output
        # Additional parquet options are based on https://stackoverflow.com/a/66854439/12907985
        # byte_stream_fields apparently only work for float fields. Other fields should be handled
        # by use_dictionary. Apparently it can't handle this automatically, we so we have to define it
        # ourselves. This is a bit brittle if fields change, but they don't change so often, and
        # it's simpler than parsing field types, so it should be fine for now.
        byte_stream_fields = [field for field in ak.fields(arrays) if field in possible_fields_containing_floats]
        dict_fields = [field for field in ak.fields(arrays) if field not in possible_fields_containing_floats]
        # logger.debug(f"dict_fields: {dict_fields}")
        # logger.debug(f"byte_stream_fields: {byte_stream_fields}")

        # Parquet with zlib seems to do about the same as ascii tar.gz when we drop unneeded columns.
        # And it should load much faster!
        ak.to_parquet(
            arrays, output_filename,
            compression=compression, compression_level=compression_level,
            explode_records=False,
            # Additional parquet options are based on https://stackoverflow.com/a/66854439/12907985
            #use_dictionary=True,
            #use_byte_stream_split=True,
            use_dictionary=dict_fields,
            use_byte_stream_split=byte_stream_fields,
        )

        # Break now so we don't have to read the next chunk.
        if (i + 1) == max_chunks:
            break
 def process(self, events):
     output = self._accumulator.identity()
     jets=events.Jet
     jetSel = (jets.pt>30) & (abs(jets.eta)<2.4)
     tightJet = jets[jetSel]
     bJet = tightJet[tightJet.btagDeepFlavB > 0.642]
     muons = events.Muon
     muonSel = (muons.pt>30) & (abs(muons.eta)<2.4)
     tightMuon = muons[muonSel]
     ele = events.Electron
     eleSel = (ele.pt>35)&(abs(ele.eta)<2.4)
     tightEle = ele[eleSel]
     eventSel = (((ak.num(tightMuon)==1) | (ak.num(tightEle)==1)) &
         (ak.num(tightJet)>= 3) & (ak.num(bJet)>=1)
                )
     final = events[eventSel]
     
     
     #####GENPART MATCHING ######
     
     genPart = final.GenPart
     tops = genPart[abs(genPart.pdgId)==6]
     #The isLastCopy Flag filters out copy Genparticles:
     tops = tops[tops.hasFlags('isLastCopy')]
     tDecay = tops.distinctChildren
     tDecay = tDecay[tDecay.hasFlags('isLastCopy')]
     t_Events=tDecay[abs(tDecay.pdgId)==5]
     W = tDecay[abs(tDecay.pdgId)==24]
     W = W[W.hasFlags('isLastCopy')]
     WDecay = W.distinctChildren
     WDecay = WDecay[WDecay.hasFlags('isLastCopy')]
     #t_events is the lone bottom, W_events is the -> two jets
     #select the hadronically decaying W
     W_Events=ak.flatten(WDecay[ak.all(abs(WDecay.pdgId)<=8,axis=-1)],axis=3)
     #print(qqb)
     #HadW is mask for Quark deacying W boson
     hadW = ak.num(W_Events,axis=2)==2
     #filters out t_events that have a hadronically decayign W Boson
     hadB = t_Events[hadW]
     hadB = ak.flatten(hadB,axis=2)
     W_quarks = W_Events[hadW]
     W_quarks = ak.flatten(W_quarks,axis=2)
     #concatentating these two arrays make an array of events with the correctly decaying GenParticles.
     qqb = ak.concatenate([hadB,W_quarks],axis=1)
     
     
     #####GEN JET MATCHING ######
     final=final[(ak.count(qqb.pdgId,axis=1)==3)]
     finaljets=final.Jet
     qqb=qqb[(ak.count(qqb.pdgId,axis=1)==3)]
     #Implementing Tight Jet Cuts on Training Data
     finaljetSel=(abs(finaljets.eta)<2.4)&(finaljets.pt>30)
     finalJets=finaljets[finaljetSel]
     #Match Gen part to gen jet
     matchedGenJets=qqb.nearest(final.GenJet)
     #match gen to reco
     matchedJets=matchedGenJets.nearest(finalJets)
 
     ### VALIDATION ###
     test=matchedJets.genJetIdx
     combs=ak.combinations(finalJets,3,replacement=False)
     t1=(combs['0'].genJetIdx==test[:,0])|(combs['0'].genJetIdx==test[:,1])|(combs['0'].genJetIdx==test[:,2])
     t2=(combs['1'].genJetIdx==test[:,0])|(combs['1'].genJetIdx==test[:,1])|(combs['1'].genJetIdx==test[:,2])
     t3=(combs['2'].genJetIdx==test[:,0])|(combs['2'].genJetIdx==test[:,1])|(combs['2'].genJetIdx==test[:,2])
     t=t1&t2&t3
     
     trutharray=ak.flatten(t)
     jetcombos=ak.flatten(combs)
     j1,j2,j3=ak.unzip(jetcombos)
     output["dR12"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j2)))
     output["dR13"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j3)))
     output["dR23"]+=processor.column_accumulator(ak.to_numpy(j2.delta_r(j3)))
     output["j1btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j2btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j3btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j1area"]+=processor.column_accumulator(ak.to_numpy(j1.area))
     output["j2area"]+=processor.column_accumulator(ak.to_numpy(j2.area))
     output["j3area"]+=processor.column_accumulator(ak.to_numpy(j3.area))
     output["j12deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j2.eta))
     output["j23deta"]+=processor.column_accumulator(ak.to_numpy(j2.eta-j3.eta))
     output["j13deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j3.eta))
     output["j12dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j2.phi))
     output["j23dphi"]+=processor.column_accumulator(ak.to_numpy(j2.phi-j3.phi))
     output["j13dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j3.phi))
     output["j1j2mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j2.mass))
     output["j2j3mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass+j3.mass))
     output["j1j3mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j3.mass))
     output["j1pt"]+=processor.column_accumulator(ak.to_numpy(j1.pt))
     output["j1phi"]+=processor.column_accumulator(ak.to_numpy(j1.phi))
     output["j1eta"]+=processor.column_accumulator(ak.to_numpy(abs(j1.eta)))
     output["j1mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass))
     output["j2pt"]+=processor.column_accumulator(ak.to_numpy(j2.pt))
     output["j2phi"]+=processor.column_accumulator(ak.to_numpy(j2.phi))
     output["j2eta"]+=processor.column_accumulator(ak.to_numpy(abs(j2.eta)))
     output["j2mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass))
     output["j3pt"]+=processor.column_accumulator(ak.to_numpy(j3.pt))
     output["j3phi"]+=processor.column_accumulator(ak.to_numpy(j3.phi))
     output["j3eta"]+=processor.column_accumulator(ak.to_numpy(abs(j3.eta)))
     output["j3mass"]+=processor.column_accumulator(ak.to_numpy(j3.mass))
     output["event"]+=processor.column_accumulator(ak.to_numpy(ak.flatten(ak.broadcast_arrays(final.event,combs['0'].pt)[0])))
     output["truth"]+=processor.column_accumulator(ak.to_numpy(trutharray).astype(int))
     
     return output
Example #11
0
Kst_parent_cut_pi = abs(Kst["pi"]["pdg"]) == abs(lp.Kst_892_0.pdgid)
Kst_parent_cut_k = abs(Kst["k"]["pdg"]) == abs(lp.Kst_892_0.pdgid)
Kst_parent_cut_all = np.logical_and(Kst_parent_cut_pi, Kst_parent_cut_k)
Kst = Kst[Kst_parent_cut_all]
#Ensure kaon and pion come from the same K*
Kst_parent_cut_kpi = Kst["pi","MC_parentindex"] == Kst["k","MC_parentindex"]
Kst = Kst[Kst_parent_cut_kpi]

for comp in ["x","y","z"]:
    Kst[f"p{comp}"] = Kst["k",f"p{comp}"] + Kst["pi",f"p{comp}"]
Kst["p"] = np.sqrt(Kst["px"]**2 + Kst["py"]**2 + Kst["pz"]**2)
Kst["e"] = np.sqrt(Kst["p"]**2 + Kst["mass"]**2)


tau = ak.combinations(pions,3)
tau["pi_1"], tau["pi_2"], tau["pi_3"] = ak.unzip(tau)

#Charge cut on the pions
tau_charge_cut_1 = np.sign(tau["pi_1","charge"]) != np.sign(tau["pi_3","charge"])
tau_charge_cut_2 = np.sign(tau["pi_2","charge"]) != np.sign(tau["pi_3","charge"])
tau_charge_cut = np.logical_and(tau_charge_cut_1, tau_charge_cut_2)

tau = tau[tau_charge_cut]

PDG_pi_m = lp.pi_plus.mass/1000.
tau["mass"] = kinematics_flat.mass([tau["pi_1"], tau["pi_2"], tau["pi_3"]], [PDG_pi_m, PDG_pi_m, PDG_pi_m])
tau_m_low_cut_val = 3*PDG_pi_m
tau_m_high_cut_val = lp.tau_plus.mass/1000.
tau_m_low_cut = tau["mass"] >= tau_m_low_cut_val
tau_m_high_cut = tau["mass"] <= lp.tau_plus.mass/1000.
tau_m_cut = np.logical_and(tau_m_low_cut, tau_m_high_cut)
def test_zip():
    x = ak.Array([[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]])
    y = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5])

    one = ak.zip({"x": x, "y": y})
    two = ak.zip({"x": x, "y": y}, depth_limit=1)
    xx, yy = ak.unzip(two)
    assert isinstance(one.layout, ak.layout.Content)
    assert isinstance(two.layout, ak.layout.Content)
    assert isinstance(xx.layout, ak.layout.Content)
    assert isinstance(yy.layout, ak.layout.Content)
    assert ak.to_list(one) == [
        [{
            "x": 1,
            "y": 1.1
        }, {
            "x": 2,
            "y": 1.1
        }, {
            "x": 3,
            "y": 1.1
        }],
        [],
        [{
            "x": 4,
            "y": 3.3
        }, {
            "x": 5,
            "y": 3.3
        }],
        [{
            "x": 6,
            "y": 4.4
        }],
        [
            {
                "x": 7,
                "y": 5.5
            },
            {
                "x": 8,
                "y": 5.5
            },
            {
                "x": 9,
                "y": 5.5
            },
            {
                "x": 10,
                "y": 5.5
            },
        ],
    ]
    assert ak.to_list(two) == [
        {
            "x": [1, 2, 3],
            "y": 1.1
        },
        {
            "x": [],
            "y": 2.2
        },
        {
            "x": [4, 5],
            "y": 3.3
        },
        {
            "x": [6],
            "y": 4.4
        },
        {
            "x": [7, 8, 9, 10],
            "y": 5.5
        },
    ]
    if not ak._util.py27 and not ak._util.py35:
        assert ak.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
        assert ak.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]

    x = ak.repartition(x, 3)
    assert isinstance(x.layout, ak.partition.PartitionedArray)
    assert ak.to_list(x) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]

    one = ak.zip({"x": x, "y": y})
    two = ak.zip({"x": x, "y": y}, depth_limit=1)
    xx, yy = ak.unzip(two)
    assert isinstance(one.layout, ak.partition.PartitionedArray)
    assert isinstance(two.layout, ak.partition.PartitionedArray)
    assert isinstance(xx.layout, ak.partition.PartitionedArray)
    assert isinstance(yy.layout, ak.partition.PartitionedArray)
    assert ak.to_list(one) == [
        [{
            "x": 1,
            "y": 1.1
        }, {
            "x": 2,
            "y": 1.1
        }, {
            "x": 3,
            "y": 1.1
        }],
        [],
        [{
            "x": 4,
            "y": 3.3
        }, {
            "x": 5,
            "y": 3.3
        }],
        [{
            "x": 6,
            "y": 4.4
        }],
        [
            {
                "x": 7,
                "y": 5.5
            },
            {
                "x": 8,
                "y": 5.5
            },
            {
                "x": 9,
                "y": 5.5
            },
            {
                "x": 10,
                "y": 5.5
            },
        ],
    ]
    assert ak.to_list(two) == [
        {
            "x": [1, 2, 3],
            "y": 1.1
        },
        {
            "x": [],
            "y": 2.2
        },
        {
            "x": [4, 5],
            "y": 3.3
        },
        {
            "x": [6],
            "y": 4.4
        },
        {
            "x": [7, 8, 9, 10],
            "y": 5.5
        },
    ]
    if not ak._util.py27 and not ak._util.py35:
        assert ak.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
        assert ak.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]

    y = ak.repartition(y, 2)
    assert isinstance(x.layout, ak.partition.PartitionedArray)
    assert ak.to_list(y) == [1.1, 2.2, 3.3, 4.4, 5.5]

    one = ak.zip({"x": x, "y": y})
    two = ak.zip({"x": x, "y": y}, depth_limit=1)
    xx, yy = ak.unzip(two)
    assert isinstance(one.layout, ak.partition.PartitionedArray)
    assert isinstance(two.layout, ak.partition.PartitionedArray)
    assert isinstance(xx.layout, ak.partition.PartitionedArray)
    assert isinstance(yy.layout, ak.partition.PartitionedArray)
    assert ak.to_list(one) == [
        [{
            "x": 1,
            "y": 1.1
        }, {
            "x": 2,
            "y": 1.1
        }, {
            "x": 3,
            "y": 1.1
        }],
        [],
        [{
            "x": 4,
            "y": 3.3
        }, {
            "x": 5,
            "y": 3.3
        }],
        [{
            "x": 6,
            "y": 4.4
        }],
        [
            {
                "x": 7,
                "y": 5.5
            },
            {
                "x": 8,
                "y": 5.5
            },
            {
                "x": 9,
                "y": 5.5
            },
            {
                "x": 10,
                "y": 5.5
            },
        ],
    ]
    assert ak.to_list(two) == [
        {
            "x": [1, 2, 3],
            "y": 1.1
        },
        {
            "x": [],
            "y": 2.2
        },
        {
            "x": [4, 5],
            "y": 3.3
        },
        {
            "x": [6],
            "y": 4.4
        },
        {
            "x": [7, 8, 9, 10],
            "y": 5.5
        },
    ]
    if not ak._util.py27 and not ak._util.py35:
        assert ak.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
        assert ak.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]

    x = ak.repartition(x, None)
    assert isinstance(x.layout, ak.layout.Content)
    assert ak.to_list(x) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]

    one = ak.zip({"x": x, "y": y})
    two = ak.zip({"x": x, "y": y}, depth_limit=1)
    xx, yy = ak.unzip(two)
    assert isinstance(one.layout, ak.partition.PartitionedArray)
    assert isinstance(two.layout, ak.partition.PartitionedArray)
    assert isinstance(xx.layout, ak.partition.PartitionedArray)
    assert isinstance(yy.layout, ak.partition.PartitionedArray)
    assert ak.to_list(one) == [
        [{
            "x": 1,
            "y": 1.1
        }, {
            "x": 2,
            "y": 1.1
        }, {
            "x": 3,
            "y": 1.1
        }],
        [],
        [{
            "x": 4,
            "y": 3.3
        }, {
            "x": 5,
            "y": 3.3
        }],
        [{
            "x": 6,
            "y": 4.4
        }],
        [
            {
                "x": 7,
                "y": 5.5
            },
            {
                "x": 8,
                "y": 5.5
            },
            {
                "x": 9,
                "y": 5.5
            },
            {
                "x": 10,
                "y": 5.5
            },
        ],
    ]
    assert ak.to_list(two) == [
        {
            "x": [1, 2, 3],
            "y": 1.1
        },
        {
            "x": [],
            "y": 2.2
        },
        {
            "x": [4, 5],
            "y": 3.3
        },
        {
            "x": [6],
            "y": 4.4
        },
        {
            "x": [7, 8, 9, 10],
            "y": 5.5
        },
    ]
    if not ak._util.py27 and not ak._util.py35:
        assert ak.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
        assert ak.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]

    y = ak.repartition(y, None)
    assert isinstance(y.layout, ak.layout.Content)
    assert ak.to_list(y) == [1.1, 2.2, 3.3, 4.4, 5.5]

    one = ak.zip({"x": x, "y": y})
    two = ak.zip({"x": x, "y": y}, depth_limit=1)
    xx, yy = ak.unzip(two)
    assert isinstance(one.layout, ak.layout.Content)
    assert isinstance(two.layout, ak.layout.Content)
    assert isinstance(xx.layout, ak.layout.Content)
    assert isinstance(yy.layout, ak.layout.Content)
    assert ak.to_list(one) == [
        [{
            "x": 1,
            "y": 1.1
        }, {
            "x": 2,
            "y": 1.1
        }, {
            "x": 3,
            "y": 1.1
        }],
        [],
        [{
            "x": 4,
            "y": 3.3
        }, {
            "x": 5,
            "y": 3.3
        }],
        [{
            "x": 6,
            "y": 4.4
        }],
        [
            {
                "x": 7,
                "y": 5.5
            },
            {
                "x": 8,
                "y": 5.5
            },
            {
                "x": 9,
                "y": 5.5
            },
            {
                "x": 10,
                "y": 5.5
            },
        ],
    ]
    assert ak.to_list(two) == [
        {
            "x": [1, 2, 3],
            "y": 1.1
        },
        {
            "x": [],
            "y": 2.2
        },
        {
            "x": [4, 5],
            "y": 3.3
        },
        {
            "x": [6],
            "y": 4.4
        },
        {
            "x": [7, 8, 9, 10],
            "y": 5.5
        },
    ]
    if not ak._util.py27 and not ak._util.py35:
        assert ak.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
        assert ak.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]
def run(B_type):

    #Load the MC
    file_name = f"p8_ee_Zbb_ecm91_EvtGen_{B_type}2TauNuTAUHADNU"
    file = uproot.open(f"{loc.IN}/{mode}/{file_name}.root")
    tree = file['events']

    #Awkward array of the MC - filter event variables if running on Bu sample
    if (B_type == "Bc"):
        events = tree.arrays(library="ak", how="zip")
    #Keep a manageable subset of the Bu events (1M too many to handle in memory)
    else:
        events = tree.arrays(library="ak", how="zip", entry_stop=20000)

    #Reco particles
    rp = events["RP"]

    #Index cut to remove negative indices (particles produced before hadronisation)
    parentindex_cut = rp["MC_parentindex"] >= 0
    rp = rp[parentindex_cut]
    grandparentindex_cut = rp["MC_grandparentindex"] >= 0
    rp = rp[grandparentindex_cut]

    #Match to MC truth partners
    rp["parent_pdg"] = events["MC", "pdg"][rp["MC_parentindex"]]
    rp["grandparent_pdg"] = events["MC", "pdg"][rp["MC_grandparentindex"]]
    rp["pdg"] = events["MC", "pdg"][rp["MC_index"]]

    #Get the production vertex of the track (truth-level)
    for v in ["x", "y", "z"]:
        rp[f"vertex_{v}"] = events["MC", f"vertex_{v}"][rp["MC_index"]]

    #Get the production vertex of the parent of the tracks (for the pions, this will give the tau production vertex i.e. the Bc decay vertex)
    for v in ["x", "y", "z"]:
        rp[f"parent_vertex_{v}"] = events["MC",
                                          f"vertex_{v}"][rp["MC_parentindex"]]

    #Separation of vertex from PV (combined tau and Bc flight)
    rp["PVsep"] = np.sqrt(rp[f"vertex_x"]**2 + rp[f"vertex_y"]**2 +
                          rp[f"vertex_z"]**2)

    #Separation of the Bc vertex from the PV
    rp["PVsep_parent"] = np.sqrt(rp[f"parent_vertex_x"]**2 +
                                 rp[f"parent_vertex_y"]**2 +
                                 rp[f"parent_vertex_z"]**2)

    #Charged tracks to make tau from, will truth match them to pi+/- ID below
    p_cut = rp["p"] > 0.
    pions = rp[p_cut]

    #Keep charged tracks
    charge_cut = abs(rp["charge"]) == 1
    pions = pions[charge_cut]

    #Build the tau -> 3pi
    tau = ak.combinations(pions, 3)
    tau["pi_1"], tau["pi_2"], tau["pi_3"] = ak.unzip(tau)

    PDG_pi_m = lp.pi_plus.mass / 1000.
    tau["mass"] = kinematics_flat.mass([tau["pi_1"], tau["pi_2"], tau["pi_3"]],
                                       [PDG_pi_m, PDG_pi_m, PDG_pi_m])
    tau_m_low_cut_val = 3 * PDG_pi_m
    tau_m_high_cut_val = lp.tau_plus.mass / 1000.
    tau_m_low_cut = tau["mass"] >= tau_m_low_cut_val
    tau_m_high_cut = tau["mass"] <= lp.tau_plus.mass / 1000.
    tau_m_cut = np.logical_and(tau_m_low_cut, tau_m_high_cut)
    tau = tau[tau_m_cut]

    #Truth-matching for signal

    #Pions are all pions
    pi1_cut = abs(tau["pi_1"]["pdg"]) == abs(lp.pi_plus.pdgid)
    pi2_cut = abs(tau["pi_2"]["pdg"]) == abs(lp.pi_plus.pdgid)
    pi3_cut = abs(tau["pi_3"]["pdg"]) == abs(lp.pi_plus.pdgid)
    pi12_cut = np.logical_and(pi1_cut, pi2_cut)
    pi_cut_all = np.logical_and(pi12_cut, pi3_cut)
    tau = tau[pi_cut_all]

    #Pion parents are all tau
    tau_parent_cut_pi1 = abs(tau["pi_1"]["parent_pdg"]) == abs(
        lp.tau_plus.pdgid)
    tau_parent_cut_pi2 = abs(tau["pi_2"]["parent_pdg"]) == abs(
        lp.tau_plus.pdgid)
    tau_parent_cut_pi3 = abs(tau["pi_3"]["parent_pdg"]) == abs(
        lp.tau_plus.pdgid)
    tau_parent_cut_12 = np.logical_and(tau_parent_cut_pi1, tau_parent_cut_pi2)
    tau_parent_cut_all = np.logical_and(tau_parent_cut_12, tau_parent_cut_pi3)
    tau = tau[tau_parent_cut_all]

    #Pion grandparent are all Bu / Bc
    parent_id = {"Bu": lp.B_plus.pdgid, "Bc": lp.B_c_plus.pdgid}
    tau_grandparent_cut_pi1 = abs(tau["pi_1"]["grandparent_pdg"]) == abs(
        parent_id[B_type])
    tau_grandparent_cut_pi2 = abs(tau["pi_2"]["grandparent_pdg"]) == abs(
        parent_id[B_type])
    tau_grandparent_cut_pi3 = abs(tau["pi_3"]["grandparent_pdg"]) == abs(
        parent_id[B_type])
    tau_grandparent_cut_12 = np.logical_and(tau_grandparent_cut_pi1,
                                            tau_grandparent_cut_pi2)
    tau_grandparent_cut_all = np.logical_and(tau_grandparent_cut_12,
                                             tau_grandparent_cut_pi3)
    tau = tau[tau_grandparent_cut_all]

    #Ensure pions come from the same tau
    tau_parent_cut_12 = tau["pi_1", "MC_parentindex"] == tau["pi_2",
                                                             "MC_parentindex"]
    tau_parent_cut_13 = tau["pi_1", "MC_parentindex"] == tau["pi_3",
                                                             "MC_parentindex"]
    tau_parent_cut = np.logical_and(tau_parent_cut_12, tau_parent_cut_13)
    tau = tau[tau_parent_cut]

    #Net charge of the tau
    tau["charge"] = tau["pi_1", "charge"] + tau["pi_2",
                                                "charge"] + tau["pi_3",
                                                                "charge"]

    #Tau energy (visible energy of the signal)
    for comp in ["x", "y", "z"]:
        tau[f"p{comp}"] = tau["pi_1", f"p{comp}"] + tau[
            "pi_2", f"p{comp}"] + tau["pi_1", f"p{comp}"]
    tau["p"] = np.sqrt(tau["px"]**2 + tau["py"]**2 + tau["pz"]**2)
    tau["e"] = np.sqrt(tau["mass"]**2 + tau["p"]**2)

    #Tau flight and Bc flight from PV (using true vertices)
    tau["PVsep"] = tau["pi_1", "PVsep"]
    #x, y, and z coordinates of the tau vertex
    for v in ["x", "y", "z"]:
        tau[f"PVsep_{v}"] = tau["pi_1", f"vertex_{v}"]
    tau["PVsep_parent"] = tau["pi_1", "PVsep_parent"]

    #Thrust axis co-ordinates to persist
    for v in ["x", "y", "z"]:
        tau[f"EVT_thrust_{v}"] = events[f"EVT_thrust_{v}"]

    #Hemisphere energies
    for hem in ["0", "1"]:
        events[f"EVT_thrutshemis{hem}_e"] = events[
            f"EVT_thrutshemis{hem}_echarged"] + events[
                f"EVT_thrutshemis{hem}_eneutral"]

    #Total energy (sum of both hemispheres)
    events[
        "EVT_e"] = events["EVT_thrutshemis0_e"] + events["EVT_thrutshemis1_e"]

    #Min and max hemisphere energies per-event
    events["EVT_thrutshemis_e_min"] = np.minimum(events["EVT_thrutshemis0_e"],
                                                 events["EVT_thrutshemis1_e"])
    events["EVT_thrutshemis_e_max"] = np.maximum(events["EVT_thrutshemis0_e"],
                                                 events["EVT_thrutshemis1_e"])

    #Difference in energy between hemispheres (max - min so always positive)
    events["EVT_thrutshemis_e_diff"] = events[
        "EVT_thrutshemis_e_max"] - events["EVT_thrutshemis_e_min"]

    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(events["EVT_thrutshemis_e_min"],
             bins=50,
             range=(0, 60),
             color="crimson",
             histtype='step',
             linewidth=2,
             label="Lower energy hemisphere per-event")
    plt.hist(events["EVT_thrutshemis_e_max"],
             bins=50,
             range=(0, 60),
             color="dodgerblue",
             histtype='step',
             linewidth=2,
             label="Higher energy hemisphere per-event")
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.xlim(0, 60)
    plt.axvline(lp.Z_0.mass / 2000.,
                color='k',
                linestyle='--',
                label="$m(Z^0)/2$")
    plt.xlabel("Hemisphere energy [GeV]", fontsize=30)
    plt.legend(fontsize=18, loc="upper left")
    ut.create_dir(loc.PLOTS)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_min_max_hemisphere_E.pdf")

    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(events["EVT_thrutshemis_e_diff"],
             bins=30,
             range=(0, 50),
             color="k",
             histtype='step',
             linewidth=2)
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.xlim(0, 50)
    plt.xlabel("Hemisphere energy difference [GeV]", fontsize=30)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_diff_hemisphere_E.pdf")

    #Store hemisphere energy info in tau container for writing out later
    tau["EVT_e"] = events["EVT_e"]
    tau["EVT_thrutshemis_e_min"] = events["EVT_thrutshemis_e_min"]
    tau["EVT_thrutshemis_e_max"] = events["EVT_thrutshemis_e_max"]
    tau["EVT_thrutshemis_e_diff"] = events["EVT_thrutshemis_e_diff"]
    tau["EVT_thrutshemis0_e"] = events["EVT_thrutshemis0_e"]
    tau["EVT_thrutshemis1_e"] = events["EVT_thrutshemis1_e"]

    #Nominal B energy = m(Z) - sum of all energy apart from signal
    tau["nominal_B_e"] = lp.Z_0.mass / 1000. - (tau["EVT_e"] - tau["e"])

    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(ak.flatten(tau["nominal_B_e"]),
             range=(0, 80),
             bins=30,
             color="k",
             histtype='step',
             linewidth=2)
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.title(
        "$m(Z) - \\Sigma$(all visible E apart from true $\\tau \\to 3\\pi$)",
        fontsize=25)
    plt.xlim(0, 80)
    plt.xlabel("Nominal $B$ energy [GeV]", fontsize=30)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_nominal_B_E.pdf")

    #Plot the tau mass for truth-matched signal
    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(ak.flatten(tau["mass"]),
             range=(tau_m_low_cut_val, tau_m_high_cut_val),
             bins=30,
             color="k",
             histtype='step',
             linewidth=2)
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.xlim(tau_m_low_cut_val, tau_m_high_cut_val)
    plt.xlabel("$m(3\\pi)$ [GeV]", fontsize=30)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_3pi_M.pdf")

    #Look at charged and neutral multiplicities in the two hemispheres

    #Events where hemisphere 0 is the minimum energy
    events_hem0_min_cut = events["EVT_thrutshemis0_e"] == events[
        "EVT_thrutshemis_e_min"]
    events_hem0_min = events[events_hem0_min_cut]

    #Events where hemisphere 1 is the minimum energy
    events_hem1_min_cut = events["EVT_thrutshemis1_e"] == events[
        "EVT_thrutshemis_e_min"]
    events_hem1_min = events[events_hem1_min_cut]

    #Get the charged and neutral energy and multiplicity
    for ptype in ["charged", "neutral"]:
        for var in ["n", "e"]:
            events_hem0_min[f"{var}{ptype}_min"] = events_hem0_min[
                f"EVT_thrutshemis0_{var}{ptype}"]
            events_hem0_min[f"{var}{ptype}_max"] = events_hem0_min[
                f"EVT_thrutshemis1_{var}{ptype}"]

            events_hem1_min[f"{var}{ptype}_min"] = events_hem1_min[
                f"EVT_thrutshemis1_{var}{ptype}"]
            events_hem1_min[f"{var}{ptype}_max"] = events_hem1_min[
                f"EVT_thrutshemis0_{var}{ptype}"]

    #Recombine
    events = ak.concatenate([events_hem0_min, events_hem1_min], axis=0)

    #Plot the charged and neutral energies and multiplicities, where the low and high energy hemispheres are shown separately

    #Axis ranges, titles, and bins
    plot_config = {
        "echarged": [0, 50, "Charged energy [GeV]", 50],
        "eneutral": [0, 40, "Neutral energy [GeV]", 50],
        "ncharged": [0, 25, "Charged multiplicity", 25],
        "nneutral": [0, 20, "Neutral multiplicity", 20]
    }

    for p in plot_config:

        #Store the values in the tau container, so we can write it out
        tau[f"{p}_min"] = events[f"{p}_min"]
        tau[f"{p}_max"] = events[f"{p}_max"]

        fig, ax = plt.subplots(figsize=(9, 9))
        plt.hist(events[f"{p}_min"],
                 bins=plot_config[p][3],
                 range=(plot_config[p][0], plot_config[p][1]),
                 color="crimson",
                 histtype='step',
                 linewidth=2,
                 label="Lower energy hemisphere per-event")
        plt.hist(events[f"{p}_max"],
                 bins=plot_config[p][3],
                 range=(plot_config[p][0], plot_config[p][1]),
                 color="dodgerblue",
                 histtype='step',
                 linewidth=2,
                 label="Higher energy hemisphere per-event")
        ax.tick_params(axis='both', which='major', labelsize=25)
        plt.xlim(plot_config[p][0], plot_config[p][1])
        plt.xlabel(plot_config[p][2], fontsize=30)
        ymin, ymax = plt.ylim()
        plt.ylim(0, 1.15 * ymax)
        plt.legend(fontsize=18, loc="upper left")
        fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_min_max_hemisphere_{p}.pdf")

    #Plot the separation of the tau pion vertex from the PV (a measure of the B + tau flight)
    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(ak.flatten(tau["pi_1", "PVsep"]),
             range=(0, 10),
             bins=30,
             color="k",
             histtype='step',
             linewidth=2)
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.xlim(0, 10)
    plt.xlabel("$3\pi$ separation from PV [mm]", fontsize=30)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_3pi_vertex_PV_sep.pdf")

    #Plot the separation of the true Bc decay vertex from the PV - this quantity cannot be reconstructed experimentally
    fig, ax = plt.subplots(figsize=(9, 9))
    plt.hist(ak.flatten(tau["pi_1", "PVsep_parent"]),
             range=(0, 4),
             bins=30,
             color="k",
             histtype='step',
             linewidth=2)
    ax.tick_params(axis='both', which='major', labelsize=25)
    plt.xlim(0, 4)
    B_name = {"Bu": "$B^\\pm$", "Bc": "$B_c^\\pm$"}
    plt.xlabel(f"{B_name[B_type]} separation from PV [mm]", fontsize=30)
    fig.savefig(f"{loc.PLOTS}/{B_type}2TauNu_Bc_vertex_PV_sep.pdf")

    #Persist information into a flat dataframe for use elsewhere e.g. MVA training vs. inclusive Z -> qq/cc/bb background

    df = pd.DataFrame()
    persist_vars = [
        "EVT_e",  #Total event visible energy
        "EVT_thrutshemis_e_min",  #Lowest energy hemisphere in event
        "EVT_thrutshemis_e_max",  #Highest energy hemisphere in event
        "EVT_thrutshemis0_e",  #costheta < 0 hemisphere energy
        "EVT_thrutshemis1_e",  #costheta > 0 hemisphere energy
        "nominal_B_e",  #Nominal signal B energy calculated using reco energy of true tau -> 3pi
        "px",
        "py",
        "pz",
        "e",
        "mass",
        "PVsep",  #Distance of true tau decay vertex from the PV
        "PVsep_x",
        "PVsep_y",
        "PVsep_z",
        "PVsep_parent",  #Distance of true Bc vertex from the PV (Bc flight which is not reconstructible)
        "EVT_thrust_x",
        "EVT_thrust_y",
        "EVT_thrust_z"
    ]
    for var in ["e", "n"]:
        for ptype in ["charged", "neutral"]:
            for m in ["min", "max"]:
                persist_vars.append(f"{var}{ptype}_{m}")

    for var in persist_vars:
        df[var] = ak.flatten(tau[var]).tolist()

    #Save to CSV
    ut.create_dir(loc.CSV)
    df.to_csv(f"{loc.CSV}/{B_type}2TauNu.csv")
    '''
Example #14
0
def best_match(gen_hyp=None, jets=None, leptons=None, met=None):
    if gen_hyp is None:
        raise ValueError("Gen Objects gen_hyp needed for matching")
    if jets is None:
        raise ValueError("Reco jets needed for matching")
    if leptons is None:
        raise ValueError("Reco leptons needed for matching")
    if met is None:
        raise ValueError("Reco met needed for matching")

    if not ak.all(ak.num(gen_hyp) == 1):
        raise ValueError("Not all events for matching are semileptonic")

    jets_ak = ak.with_name(jets[["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector")
    leps_ak = ak.with_name(leptons[["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector")

        # init dict of objects
    matched_objects = {}

        # match jet closest to gen objects 
    for genobj in ['BHad', 'BLep', 'WJa', 'WJb']:
        genobj_ak = ak.with_name(gen_hyp[genobj][["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector")
        jets_akc, genobj_akc = ak.unzip(ak.cartesian([jets_ak, genobj_ak], nested=False))
        deltaRs = jets_akc.delta_r(genobj_akc)  # find deltaRs between jets and gen object
        indexOfMin = ak.unflatten(ak.argmin(deltaRs, axis=1), ak.num(genobj_ak))
        passing_inds = deltaRs[indexOfMin] < 0.4

        matched_jets_inds = indexOfMin[passing_inds]
        matched_jets = jets[matched_jets_inds]

        ## add matched perm objects
        matched_objects[genobj] = ak.Array({
            'pt' : matched_jets.pt,
            'eta' : matched_jets.eta,
            'phi' : matched_jets.phi,
            'mass' : matched_jets.mass,
            'jetIdx' : matched_jets_inds, # index of jet that the gen object is matched to in the event
        }, with_name="PtEtaPhiMLorentzVector")
        
        # match lepton closest to gen lepton
    genlep_ak = ak.with_name(gen_hyp['Lepton'][["pt", "eta", "phi", "mass"]],"PtEtaPhiMLorentzVector")
    lep_akc, genlep_akc = ak.unzip(ak.cartesian([leps_ak, genlep_ak], nested=False))
    lepDRs = lep_akc.delta_r(genlep_akc)
    lepIdxOfMin = ak.unflatten(ak.argmin(lepDRs, axis=1), ak.num(genlep_ak))
    passing_inds = lepDRs[lepIdxOfMin] < 0.4
    matched_leps_inds = lepIdxOfMin[passing_inds]
    matched_leps = leptons[matched_leps_inds]
    
    ## add matched perm objects
    matched_objects['Lepton'] = ak.Array({key: matched_leps[key] for key in matched_leps.fields}, with_name="PtEtaPhiMLorentzVector")

        # solve for neutrino
    nu_array = np.zeros((len(ak.num(jets)), 4), dtype='float64')
            # convert all inputs into 2d numpy arrays of dtype=float64 (won't work if they're not float64)
    blep_inputs = np.stack((ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].px, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].py, 1), -999))).astype('float64'),\
        ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].pz, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['BLep'].energy, 1), -999))).astype('float64')), axis=-1)
    lep_inputs = np.stack((ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].px, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].py, 1), -999))).astype('float64'),\
        ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].pz, 1), -999))).astype('float64'), ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(matched_objects['Lepton'].energy, 1), -999))).astype('float64')), axis=-1)
    met_inputs = np.stack((ak.to_numpy(ak.fill_none(met.px, -999)).astype('float64'), ak.to_numpy(ak.fill_none(met.py, -999)).astype('float64')), axis=-1)
    nu_array = find_nu(bleps=blep_inputs, leptons=lep_inputs, met=met_inputs, nu_array=nu_array)

    valid_nu = ~((nu_array[:, 3] > 1e20) | (nu_array[:, 3] == 0)) # events that have a solution and matched blep

        # convert px, py, pz to pt, eta, phi
    nu_px, nu_py, nu_pz = nu_array[:, 0][valid_nu], nu_array[:, 1][valid_nu], nu_array[:, 2][valid_nu]
    nu_mom, nu_pt = np.sqrt(np.square(nu_px)+np.square(nu_py)+np.square(nu_pz)), np.sqrt(np.square(nu_px)+np.square(nu_py))
    nu_phi = np.arctan2(nu_py, nu_px)
    nu_eta = np.arcsinh(nu_pz/nu_pt)
    matched_objects['Nu'] = ak.Array({
        'pt' : ak.unflatten(nu_pt, valid_nu.astype(int)),
        'eta' : ak.unflatten(nu_eta, valid_nu.astype(int)),
        'phi' : ak.unflatten(nu_phi, valid_nu.astype(int)),
        'mass' : ak.zeros_like(ak.unflatten(nu_array[:, 0][valid_nu], valid_nu.astype(int))),
        'chi2' : ak.unflatten(nu_array[:, 3][valid_nu], valid_nu.astype(int)),
    }, with_name="PtEtaPhiMLorentzVector")

    matched_perm = make_perm_table(bhad=matched_objects['BHad'], blep=matched_objects['BLep'], wja=matched_objects['WJa'], wjb=matched_objects['WJb'], lepton=matched_objects['Lepton'], met=met, nu=matched_objects['Nu'])

    return matched_perm
Example #15
0
def get_evt_p4(p4, num=6):
    combos = ak.combinations(p4, num)
    part0, part1, part2, part3, part4, part5 = ak.unzip(combos)
    evt_p4 = part0 + part1 + part2 + part3 + part4 + part5
    return evt_p4