Beispiel #1
0
def merge_closeby_particles(g, pid=22, deltar_cut=0.001):
    photons = [
        elem for elem in g.nodes
        if g.nodes[elem]["typ"] == pid and (elem[0] == "tp" or elem[0] == "sc")
    ]
    phot_eta = [g.nodes[node]["eta"] for node in photons]
    phot_phi = [g.nodes[node]["phi"] for node in photons]
    merge_pairs = []

    pairs_0, pairs_1 = deltar_pairs(phot_eta, phot_phi, deltar_cut)
    merge_pairs = [(photons[p0], photons[p1])
                   for p0, p1 in zip(pairs_0, pairs_1)]

    for pair in merge_pairs:
        if pair[0] in g.nodes and pair[1] in g.nodes:
            lv = vector.obj(pt=0, eta=0, phi=0, E=0)
            for gp in pair:
                lv += vector.obj(pt=g.nodes[gp]["pt"],
                                 eta=g.nodes[gp]["eta"],
                                 phi=g.nodes[gp]["phi"],
                                 E=g.nodes[gp]["e"])

            g.nodes[pair[0]]["pt"] = lv.pt
            g.nodes[pair[0]]["eta"] = lv.eta
            g.nodes[pair[0]]["phi"] = lv.phi
            g.nodes[pair[0]]["e"] = lv.energy

            # add edge weights from the deleted photon to the remaining photon
            for suc in g.successors(pair[1]):
                if (pair[0], suc) in g.edges:
                    g.edges[(pair[0],
                             suc)]["weight"] += g.edges[(pair[1],
                                                         suc)]["weight"]
            g.remove_nodes_from([pair[1]])
Beispiel #2
0
def test_mul():
    assert v1 * 10 == vector.obj(x=10, y=50)
    assert 10 * v1 == vector.obj(x=10, y=50)
    assert numpy.allclose(
        a1 * 10, vector.array({"x": [10, 20, 30, 40], "y": [50, 60, 70, 80]})
    )
    assert numpy.allclose(
        10 * a1, vector.array({"x": [10, 20, 30, 40], "y": [50, 60, 70, 80]})
    )
    with pytest.raises(TypeError):
        v1 * v2
    with pytest.raises(TypeError):
        a1 * a2
Beispiel #3
0
def items2vectors(events, names):
    vectors = []
    if "selectedPhoton" in names:
        vectors.append(
            vector.obj(pt=events['selectedPhoton_pt'],
                       eta=events['selectedPhoton_eta'],
                       phi=events['selectedPhoton_phi'],
                       M=events['selectedPhoton_mass']))
    if "FatJet" in names:
        vectors.append(
            vector.obj(pt=events['FatJet_pt'],
                       eta=events['FatJet_eta'],
                       phi=events['FatJet_phi'],
                       M=events['FatJet_msoftdrop']))
    return vectors
Beispiel #4
0
def pvec(vec4):
  vec3 = vector.obj(
    x=vec4.x,
    y=vec4.y,
    z=vec4.z
  )
  return vec3
Beispiel #5
0
def getcosthetastar_cs(events,diphoton, fatjet):

    # https://github.com/cms-analysis/flashgg/blob/1453740b1e4adc7184d5d8aa8a981bdb6b2e5f8e/DataFormats/src/DoubleHTag.cc#L41
    beam_energy = 6500
    nevts = len(events)
    costhetastar_cs = np.ones(nevts)*-999 # is it needed?

    # convert using vector
    p1 = vector.obj(
          x=np.zeros(nevts),
          y=np.zeros(nevts),
          z=np.ones(nevts)*beam_energy,
          t=np.ones(nevts)*beam_energy,
        )

    p2 = vector.obj(
          x=np.zeros(nevts),
          y=np.zeros(nevts),
          z=np.ones(nevts)*beam_energy*(-1),
          t=np.ones(nevts)*beam_energy,
        )

    #  ___________________  # 
    # |        _ _        | # 
    # |  /^^^\{6,6}/^^^\  | # 
    # |  \^^^/(""")\^^^/  | # 
    # |  /^^/  \"/  \^^\  | # 
    # | /'`    /|\    `'\ | # 
    # |___________________| # 
    #                       # 
           
    hh = diphoton + fatjet
    boostvec = hh.to_beta3() * -1

    p1_boost = p1.boost(boostvec)
    p2_boost = p2.boost(boostvec)

    CSaxis = (pvec(p1_boost).unit() - pvec(p2_boost).unit()).unit()
    
    diphoton_boost = diphoton.boost(boostvec)
    diphoton_vec_unit = pvec(diphoton_boost).unit()


    return CSaxis.dot(diphoton_vec_unit)
Beispiel #6
0
def test_array_casting():
    obj = vector.obj(x=1, y=1)
    assert isinstance(obj, vector.VectorObject2D)
    assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy2D)
    assert numpy.asanyarray(obj).shape == ()

    obj = vector.obj(px=1, py=1)
    assert isinstance(obj, vector.MomentumObject2D)
    assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy2D)
    assert numpy.asanyarray(obj).shape == ()

    obj = vector.obj(x=1, y=1, z=1)
    assert isinstance(obj, vector.VectorObject3D)
    assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy3D)
    assert numpy.asanyarray(obj).shape == ()

    obj = vector.obj(px=1, py=1, pz=1)
    assert isinstance(obj, vector.MomentumObject3D)
    assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy3D)
    assert numpy.asanyarray(obj).shape == ()

    obj = vector.obj(x=1, y=1, z=1, t=1)
    assert isinstance(obj, vector.VectorObject4D)
    assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy4D)
    assert numpy.asanyarray(obj).shape == ()

    obj = vector.obj(px=1, py=1, pz=1, E=1)
    assert isinstance(obj, vector.MomentumObject4D)
    assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy4D)
    assert numpy.asanyarray(obj).shape == ()
Beispiel #7
0
def test_truediv():
    assert v1 / 10 == vector.obj(x=0.1, y=0.5)
    with pytest.raises(TypeError):
        10 / v1
    assert numpy.allclose(
        a1 / 10, vector.array({"x": [0.1, 0.2, 0.3, 0.4], "y": [0.5, 0.6, 0.7, 0.8]})
    )
    with pytest.raises(TypeError):
        10 / a1
    with pytest.raises(TypeError):
        v1 / v2
    with pytest.raises(TypeError):
        a1 / a2
Beispiel #8
0
def test_sub():
    assert v1 - v2 == vector.obj(x=-9, y=-15)
    assert numpy.allclose(
        a1 - a2,
        vector.array({"x": [-9, -98, -997, -9996], "y": [-15, -194, -1993, -19992]}),
    )
    assert numpy.allclose(
        v1 - a2,
        vector.array({"x": [-9, -99, -999, -9999], "y": [-15, -195, -1995, -19995]}),
    )
    assert numpy.allclose(
        a2 - v1,
        vector.array({"x": [9, 99, 999, 9999], "y": [15, 195, 1995, 19995]}),
    )
    with pytest.raises(TypeError):
        v1 - 5
    with pytest.raises(TypeError):
        5 - v1
Beispiel #9
0
def test_add():
    assert v1 + v2 == vector.obj(x=11, y=25)
    assert numpy.allclose(
        a1 + a2,
        vector.array({"x": [11, 102, 1003, 10004], "y": [25, 206, 2007, 20008]}),
    )
    assert numpy.allclose(
        v1 + a2,
        vector.array({"x": [11, 101, 1001, 10001], "y": [25, 205, 2005, 20005]}),
    )
    assert numpy.allclose(
        a2 + v1,
        vector.array({"x": [11, 101, 1001, 10001], "y": [25, 205, 2005, 20005]}),
    )
    with pytest.raises(TypeError):
        v1 + 5
    with pytest.raises(TypeError):
        5 + v1
Beispiel #10
0
def Loop(file_list):

    # define array
    histo = {}

    # --Start File Loop
    for arrays, doc in uproot.iterate(flist, branches,
                                      report=True):  #  for Uproot4

        print("from: {0}, to: {1} -- Entries: {2}".format(
            doc.start, doc.stop, len(arrays)))

        Electron = ak.zip({
            "PT": arrays[b"Electron.PT"],
            "Eta": arrays[b"Electron.Eta"],
            "Phi": arrays[b"Electron.Phi"],
            "Charge": arrays[b"Electron.Charge"],
        })

        Muon = ak.zip({
            "PT": arrays[b"MuonLoose.PT"],
            "Eta": arrays[b"MuonLoose.Eta"],
            "Phi": arrays[b"MuonLoose.Phi"],
            "Charge": arrays[b"MuonLoose.Charge"],
        })

        Photon = ak.zip({
            "PT": arrays[b"PhotonLoose.PT"],
            "Eta": arrays[b"PhotonLoose.Eta"],
            "Phi": arrays[b"PhotonLoose.Phi"],
        })

        MET = ak.zip({
            "PT": arrays[b"PuppiMissingET.MET"],
            "Phi": arrays[b"PuppiMissingET.Phi"],
        })

        ## --- Electron Selection
        cut = (Electron.PT > 20) & (abs(Electron.Eta) < 2.5)
        Electron = Electron[cut]

        # Apply Electron Selection
        cut = ak.num(Electron) >= 2
        Electron = Electron[cut]
        Photon = Photon[cut]
        Muon = Muon[cut]
        MET = MET[cut]

        ## --- Event Selection

        # Basics of OSSF
        os_cut = (Electron[:, 0].Charge + Electron[:, 1].Charge == 0)
        Electron = Electron[os_cut]
        Photon = Photon[os_cut]
        Muon = Muon[os_cut]
        MET = MET[os_cut]

        Ele1vec = vector.obj(pt=Electron[:, 0].PT,
                             eta=Electron[:, 0].Eta,
                             phi=Electron[:, 0].Phi,
                             mass=0)
        Ele2vec = vector.obj(pt=Electron[:, 1].PT,
                             eta=Electron[:, 1].Eta,
                             phi=Electron[:, 1].Phi,
                             mass=0)

        diele = Ele1vec + Ele2vec

        ## --- Flatten and Convert to numpy array
        diele_mass = ak.to_numpy(diele.mass)

        ## --- Fill Ntuple
        if len(histo) == 0:
            histo['diele_mass'] = diele_mass
        else:
            histo['diele_mass'] = np.concatenate(
                [histo['diele_mass'], diele_mass])

        print("size of output array: ", len(histo['diele_mass']))

    return histo
Beispiel #11
0
    def get_good_kaons(self, photons='one'):
        """
        photons : None, 'one', 'all' -- как работать с фотонами из калориметра. None -- не добавлять их в данные, 
        'one' -- только пару с лучшим соответствием pi0, 'all' -- добавить все пары
        """
        dat_tracks = self.get_dat_tracks()
        dat_kaons = self.get_dat_kaons()
        dat_glob = self.get_dat_glob()

        dat_goods = dat_tracks.join(dat_kaons, how='inner')
        goods = dat_goods.groupby('entry').agg(
            num=('tz', 'count')).query('num==2').index
        dat_goods = dat_goods.reset_index().set_index(
            'entry').loc[goods].reset_index().set_index(['entry', 'subentry'])

        dat_goods['tcharge'] = np.where(dat_goods['tcharge'] > 0, 'p', 'n')
        dat_goods = pd.pivot_table(dat_goods.reset_index(),
                                   values=[
                                       'trho', 'tz', 'tdedx', 'tptot', 'tth',
                                       'tphi', 'ksminv', 'ksalign', 'ksptot',
                                       'ksdpsi', 'ksz0', 'kslen', 'ksth',
                                       'ksphi'
                                   ],
                                   index=['entry'],
                                   columns=['tcharge'])
        dat_goods.columns = [
            '_'.join(map(lambda x: str(x), col)) for col in dat_goods.columns
        ]
        dat_goods.drop([
            'ksalign_n', 'ksminv_n', 'ksptot_n', 'ksdpsi_n', 'ksz0_n',
            'kslen_n', 'ksth_n', 'ksphi_n'
        ],
                       axis=1,
                       inplace=True)

        #kick badruns
        dat_glob = dat_glob.query('badrun==False')
        dat_goods = dat_goods.join(dat_glob, how='inner')

        #add x1, x2
        dat_goods = dat_goods.rename(
            {
                'ksalign_p': 'ksalign',
                'ksminv_p': 'ksminv',
                'ksptot_p': 'ksptot',
                'ksdpsi_p': 'ksdpsi',
                'ksz0_p': 'ksz0',
                'kslen_p': 'kslen',
                'ksth_p': 'ksth',
                'ksphi_p': 'ksphi'
            },
            axis=1)
        dat_goods['x1'], dat_goods['x2'] = get_x(dat_goods)

        #calc recoil mass
        vec = vector.array({
            'pt':
            dat_goods['ksptot'] * np.sin(dat_goods['ksth']),
            'theta':
            dat_goods['ksth'],
            'phi':
            dat_goods['ksphi'],
            'mass':
            dat_goods['ksminv'],
        })
        vec0 = vector.obj(px=0, py=0, pz=0, E=dat_goods['emeas'].mean() * 2)
        dat_goods['recoil'] = (vec0 - vec).mass
        del vec

        #add photons
        if photons is not None:
            dat_photons = self.get_dat_photons()
            dat_goods = pd.merge(dat_goods.reset_index(),
                                 dat_photons.reset_index(),
                                 on='entry',
                                 how='left')
            dat_goods['subentry'] = dat_goods['subentry'].fillna(0).astype(int)
            dat_goods = dat_goods.set_index(['entry', 'subentry'])
            if photons == 'one':
                dat_goods = dat_goods.sort_values(
                    'M', ascending=True,
                    key=lambda x: np.abs(x - 134.97)).groupby('entry').agg(
                        'first')

        return dat_goods
Beispiel #12
0
def test_add():
    one = vector.Array([[{
        "x": 1,
        "y": 1.1
    }, {
        "x": 2,
        "y": 2.2
    }], [], [{
        "x": 3,
        "y": 3.3
    }]])

    two = vector.Array([{
        "x": 10,
        "y": 20
    }, {
        "x": 100,
        "y": 200
    }, {
        "x": 1000,
        "y": 2000
    }])
    assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D)
    assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D)
    assert one.add(two).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 1003,
            "y": 2003.3
        }],
    ]
    assert two.add(one).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 1003,
            "y": 2003.3
        }],
    ]

    two = vector.array({"x": [10, 100, 1000], "y": [20, 200, 2000]})
    assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D)
    assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D)
    assert one.add(two).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 1003,
            "y": 2003.3
        }],
    ]
    assert two.add(one).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 1003,
            "y": 2003.3
        }],
    ]

    two = vector.obj(x=10, y=20)
    assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D)
    assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D)
    assert one.add(two).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 13,
            "y": 23.3
        }],
    ]
    assert two.add(one).tolist() == [
        [{
            "x": 11,
            "y": 21.1
        }, {
            "x": 12,
            "y": 22.2
        }],
        [],
        [{
            "x": 13,
            "y": 23.3
        }],
    ]
Beispiel #13
0
def prepare_normalized_table(g, genparticle_energy_threshold=0.2):
    # rg = g.reverse()

    all_genparticles = []
    all_elements = []
    all_pfcandidates = []
    for node in g.nodes:
        if node[0] == "elem":
            all_elements += [node]
            for parent in g.predecessors(node):
                all_genparticles += [parent]
        elif node[0] == "pfcand":
            all_pfcandidates += [node]
    all_genparticles = list(set(all_genparticles))
    all_elements = sorted(all_elements)

    # assign genparticles in reverse energy order uniquely to best element
    elem_to_gp = {}  # map of element -> genparticles
    unmatched_gp = []
    for gp in sorted(all_genparticles,
                     key=lambda x: g.nodes[x]["e"],
                     reverse=True):
        elems = [e for e in g.successors(gp)]

        # sort elements by energy deposit from genparticle
        elems_sorted = sorted([(g.edges[gp, e]["weight"], e) for e in elems],
                              key=lambda x: x[0],
                              reverse=True)

        chosen_elem = None
        for weight, elem in elems_sorted:
            if not (elem in elem_to_gp):
                chosen_elem = elem
                elem_to_gp[elem] = []
                break

        if chosen_elem is None:
            unmatched_gp += [gp]
        else:
            elem_to_gp[elem] += [gp]

    # assign unmatched genparticles to best element, allowing for overlaps
    for gp in sorted(unmatched_gp, key=lambda x: g.nodes[x]["e"],
                     reverse=True):
        elems = [e for e in g.successors(gp)]
        elems_sorted = sorted([(g.edges[gp, e]["weight"], e) for e in elems],
                              key=lambda x: x[0],
                              reverse=True)
        _, elem = elems_sorted[0]
        elem_to_gp[elem] += [gp]

    unmatched_cand = []
    elem_to_cand = {}

    # Find primary element for each PFCandidate
    for cand in sorted(all_pfcandidates,
                       key=lambda x: g.nodes[x]["e"],
                       reverse=True):
        tp = g.nodes[cand]["typ"]
        neighbors = list(g.predecessors(cand))

        chosen_elem = None

        # Pions, muons and electrons will be assigned to the best associated track
        if tp in [211, 13, 11]:
            for elem in neighbors:
                tp_neighbor = g.nodes[elem]["typ"]

                # track or gsf
                if tp_neighbor == 1 or tp_neighbor == 6:
                    if not (elem in elem_to_cand):
                        chosen_elem = elem
                        elem_to_cand[elem] = cand
                        break

        # other particles will be assigned to the highest-energy cluster (ECAL, HCAL, HFEM, HFHAD, SC)
        else:
            # neighbors = [n for n in neighbors if g.nodes[n]["typ"] in [4,5,8,9,10]]
            # sorted_neighbors = sorted(neighbors, key=lambda x: g.nodes[x]["e"], reverse=True)
            sorted_neighbors = sorted(neighbors,
                                      key=lambda x: g.edges[
                                          (x, cand)]["weight"],
                                      reverse=True)
            for elem in sorted_neighbors:
                if not (elem in elem_to_cand):
                    chosen_elem = elem
                    elem_to_cand[elem] = cand
                    break

        if chosen_elem is None:
            # print("unmatched candidate {}, {}".format(cand, g.nodes[cand]))
            unmatched_cand += [cand]

    Xelem = np.recarray((len(all_elements), ),
                        dtype=[(name, np.float32) for name in elem_branches])
    Xelem.fill(0.0)
    ygen = np.recarray((len(all_elements), ),
                       dtype=[(name, np.float32) for name in target_branches])
    ygen.fill(0.0)
    ycand = np.recarray((len(all_elements), ),
                        dtype=[(name, np.float32) for name in target_branches])
    ycand.fill(0.0)

    for ielem, elem in enumerate(all_elements):
        elem_type = g.nodes[elem]["typ"]
        genparticles = sorted(elem_to_gp.get(elem, []),
                              key=lambda x: g.edges[(x, elem)]["weight"],
                              reverse=True)
        genparticles = [
            gp for gp in genparticles
            if g.nodes[gp]["e"] > genparticle_energy_threshold
        ]
        candidate = elem_to_cand.get(elem, None)

        for j in range(len(elem_branches)):
            Xelem[elem_branches[j]][ielem] = g.nodes[elem][elem_branches[j]]

        if not (candidate is None):
            for j in range(len(target_branches)):
                ycand[target_branches[j]][ielem] = g.nodes[candidate][
                    target_branches[j]]

        lv = vector.obj(x=0, y=0, z=0, t=0)
        if len(genparticles) > 0:

            # print(
            #     "elem type={} E={:.2f} eta={:.2f} phi={:.2f} q={}".format(
            #         g.nodes[elem]["typ"],
            #         g.nodes[elem]["e"],
            #         g.nodes[elem]["eta"],
            #         g.nodes[elem]["phi"],
            #         g.nodes[elem]["charge"],
            #     )
            # )
            # for gp in genparticles:
            #     print(
            #         "  gp type={} E={:.2f} eta={:.2f} phi={:.2f} q={} w={:.2f}".format(
            #             g.nodes[gp]["typ"],
            #             g.nodes[gp]["e"],
            #             g.nodes[gp]["eta"],
            #             g.nodes[gp]["phi"],
            #             g.nodes[gp]["charge"],
            #             g.edges[(gp, elem)]["weight"],
            #         )
            #     )

            pid = g.nodes[genparticles[0]]["typ"]
            charge = g.nodes[genparticles[0]]["charge"]

            for gp in genparticles:
                lv += vector.obj(pt=g.nodes[gp]["pt"],
                                 eta=g.nodes[gp]["eta"],
                                 phi=g.nodes[gp]["phi"],
                                 e=g.nodes[gp]["e"])

            # remap PID in case of HCAL cluster to neutral
            if elem_type == 5 and (pid == 22 or pid == 11):
                pid = 130

            # remap forward region to HFHAD or HFEM
            if elem_type in [8, 9]:
                if pid == 130:
                    pid = 1
                elif pid == 22:
                    pid = 2

            # Remap HF candidates to neutral hadron or photon in case not matched to HF
            if elem_type in [2, 3, 4, 5]:
                if pid == 1:
                    pid = 130
                elif pid == 2:
                    pid = 22

            gp = {
                "pt": lv.rho,
                "eta": lv.eta,
                "sin_phi": np.sin(lv.phi),
                "cos_phi": np.cos(lv.phi),
                "e": lv.t,
                "typ": pid,
                "px": lv.x,
                "py": lv.y,
                "pz": lv.z,
                "charge": charge if pid in [211, 11, 13] else 0,
            }
            # print("  mlpf: type={} E={:.2f} eta={:.2f} phi={:.2f} q={}".format(pid, lv.t, lv.eta, lv.phi, gp["charge"]))

            for j in range(len(target_branches)):
                ygen[target_branches[j]][ielem] = gp[target_branches[j]]

    return Xelem, ycand, ygen
Beispiel #14
0
def main(args):

    # Read nano, micro, EB or EE cuts
    nanoaod_arr = ak.from_parquet(args.nano_input_dir)
    print("Read nanoaod: {}".format(nanoaod_arr.type))
    
    microaod_arr = uproot.concatenate(
        ["{}/*.root:diphotonDumper/trees/ggH_125_13TeV_All_$SYST".format(args.micro_input_dir)]
        )
    print("Read microaod: {}".format(microaod_arr.type))
    # Stupid typo in flashgg
    if "lead_ch_iso_worst__uncorr" in microaod_arr.fields:
        microaod_arr["lead_ch_iso_worst_uncorr"] = microaod_arr["lead_ch_iso_worst__uncorr"]

    if args.sd == "EB":
        nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.lead_eta) < 1.5]
        nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.sublead_eta) < 1.5]
        microaod_arr = microaod_arr[np.abs(microaod_arr.lead_eta) < 1.5]
        microaod_arr = microaod_arr[np.abs(microaod_arr.sublead_eta) < 1.5]

    if args.sd == "EE":
        nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.lead_eta) > 1.5]
        nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.sublead_eta) > 1.5]
        microaod_arr = microaod_arr[np.abs(microaod_arr.lead_eta) > 1.5]
        microaod_arr = microaod_arr[np.abs(microaod_arr.sublead_eta) > 1.5]

    # Read catalogue of variables to be plotted
    with open("plots_specs.json", "r") as f:
        columns = json.load(f)

    # Create dict where keys are names of variables in nano and values are names of variables in micro
    nano_micro_names = {var["nano_col"]: var["micro_col"] for var in columns}
    nano_micro_names["event"] = "event"
    nano_micro_names["lumi"] = "lumi"

    # Event by event
    nano_dict = {k: nanoaod_arr[k] for k in nano_micro_names.keys()}
    nano_dict["lead_fixedGridRhoAll"] = nanoaod_arr["lead_fixedGridRhoAll"] # needed for XGBoost vs TMVA
    test_nano = ak.Array(nano_dict)

    test_micro = microaod_arr[nano_micro_names.values()]

    pd_nano = ak.to_pandas(test_nano)
    pd_micro = ak.to_pandas(test_micro)

    pd_nano = pd_nano.set_index(["event", "lumi"])
    pd_micro = pd_micro.set_index(["event", "lumi"])

    pd_joined = pd_nano.join(pd_micro, lsuffix="_nano", rsuffix="_micro")

    print("Joined dataframe:\n{}".format(pd_joined))

    #Remove NaN values
    for nano_name, micro_name in nano_micro_names.items():
        if nano_name in ["event", "lumi"]:
            break
        if nano_name == micro_name:
            nano_name += "_nano"
            micro_name += "_micro"
        pd_joined = pd_joined[pd_joined[nano_name].notna()]
        pd_joined = pd_joined[pd_joined[micro_name].notna()]

    # Cut over delta R
    # Here https://github.com/CoffeaTeam/coffea/blob/3db3fab23064c70d0ca63b185d51c7fa3b7849dc/coffea/nanoevents/methods/vector.py#L74
    # useful info
    deltaR_threshold = 0.1

    four_lead_nano = vector.obj(
        pt=pd_joined["lead_pt"],
        phi=pd_joined["lead_phi_nano"],
        eta=pd_joined["lead_eta_nano"],
        E=pd_joined["lead_energyRaw"]
    )

    four_sublead_nano = vector.obj(
        pt=pd_joined["sublead_pt"],
        phi=pd_joined["sublead_phi_nano"],
        eta=pd_joined["sublead_eta_nano"],
        E=pd_joined["sublead_energyRaw"]
    )

    pd_joined["deltaR_nano"] = four_lead_nano.deltaR(four_sublead_nano)

    four_lead_micro = vector.obj(
        pt=pd_joined["leadPt"],
        phi=pd_joined["lead_phi_micro"],
        eta=pd_joined["lead_eta_micro"],
        E=pd_joined["lead_SCRawE"]
    )

    four_sublead_micro = vector.obj(
        pt=pd_joined["subleadPt"],
        phi=pd_joined["sublead_phi_micro"],
        eta=pd_joined["sublead_eta_micro"],
        E=pd_joined["sublead_SCRawE"]
    )

    pd_joined["lead_deltaR"] = four_lead_nano.deltaR(four_lead_micro)
    pd_joined["sublead_deltaR"] = four_sublead_nano.deltaR(four_sublead_micro)
    pd_joined = pd_joined[pd_joined["lead_deltaR"] < deltaR_threshold]
    pd_joined = pd_joined[pd_joined["sublead_deltaR"] < deltaR_threshold]
    print("Final joined dataframe:\n{}".format(pd_joined))

    # Plot
    print("Start plotting")
    for column in columns:
        fig, (up, middle, down) = plt.subplots(
            nrows=3,
            ncols=1,
            gridspec_kw={"height_ratios": (2, 1, 1)}
            )

        nano_name = column["nano_col"]
        micro_name = column["micro_col"]

        if nano_name == micro_name:
            nano_name += "_nano"
            micro_name += "_micro"
        
        range = column["range"]

        # Up
        n, n_, n__ = up.hist(pd_joined[nano_name], bins=column["bins"], range=range, histtype="step", label="NanoAOD", linewidth=2)
        m, m_, m__ = up.hist(pd_joined[micro_name], bins=column["bins"], range=range, histtype="step", label="MicroAOD", linewidth=2)

        up.legend(fontsize=18, loc="upper right")
        up.set_xlim(range)
        up.set_xlabel(column["var"])
        up.set_ylabel("Events")
        if "log" in column:
            up.set_yscale("log")
        
        # Middle
        ylim = [0, 2]
        middle.set_ylim(ylim)
        #middle.axhline(1, xmin=range[0], xmax=range[1], color="black", alpha=0.6)
        centers = (n_[:-1] + n_[1:]) / 2
        middle.plot(centers, n / m, "k.")
        middle.set_xlim(range)
        middle.set_xlabel(column["var"])
        middle.set_ylabel("$n/\mu$")
        middle.grid(which="both")

        # Down
        perc_range = (-300, 300)
        perc_bins = 500
        down.hist(100 * (pd_joined[nano_name] - pd_joined[micro_name]) / pd_joined[micro_name], 
                  bins=perc_bins,
                  range=perc_range,
                  histtype="step",
                  density=True,
                  color="black",
                  linewidth=2)
        #down.set_yscale("log")
        down.set_xlabel("$(n_{ev} - \mu_{ev})/\mu_{ev}$ [%]")
        down.set_ylabel("Events / {}%".format((perc_range[1] - perc_range[0]) / perc_bins))

        print(column["nano_col"])
        print("nano: {}".format(np.sum(n)))
        print("micro: {}".format(np.sum(m)))
        print("diff = {}".format(abs(np.sum(n) - np.sum(m))))
        print("rel diff = {}%\n".format(100 * abs(np.sum(n) - np.sum(m)) / max(np.sum(n), np.sum(m))))

        fig.tight_layout()

        fig.savefig("{}/{}_{}.png".format(args.output_dir, column["nano_col"], args.sd), bbox_inches='tight')
        fig.savefig("{}/{}_{}.pdf".format(args.output_dir, column["nano_col"], args.sd), bbox_inches='tight')

        plt.close(fig)

    # Dump pandas dataframe to parquet file
    pd_joined.to_parquet("nano_micro_{}.parquet".format(args.sd), engine="fastparquet")
    print("Dumped dataframe to parquet file")

    # Redundant: dump separate dataframes for nano and micro with PhotonID inputs
    nano_vars = {
        "r9": "lead_r9_nano", 
        "s4": "lead_s4_nano",
        "sieie": "lead_sieie_nano",
        "etaWidth": "lead_etaWidth",
        "phiWidth": "lead_phiWidth",
        "sieip": "lead_sieip_nano",
        "pfPhoIso03": "lead_pfPhoIso03",
        "pfChargedIsoPFPV": "lead_pfChargedIsoPFPV",
        "pfChargedIsoWorstVtx": "lead_pfChargedIsoWorstVtx",

        "mva_ID": "lead_mvaID_recomputed"
        }

    micro_vars = {
        "r9": "lead_r9_micro", 
        "s4": "lead_s4_micro",
        "sieie": "lead_sieie_micro",
        "etaWidth": "lead_eta_width",
        "phiWidth": "lead_phi_width",
        "sieip": "lead_sieip_micro",
        "pfPhoIso03": "lead_pho_iso",
        "pfChargedIsoPFPV": "lead_ch_iso",
        "pfChargedIsoWorstVtx": "lead_ch_iso_worst",

        "mva_ID": "lead_mva"
        }

    nano_isos = {
        "pfPhoIso03": "lead_pfPhoIso03",
        "pfChargedIsoPFPV": "lead_pfChargedIsoPFPV",
        "pfChargedIsoWorstVtx": "lead_pfChargedIsoWorstVtx",
        "pfPhoIso03_uncorr": "lead_uncorr_pfPhoIso03",
        "pfChargedIsoPFPV_uncorr": "lead_uncorr_pfChargedIsoPFPV",
        "pfChargedIsoWorstVtx_uncorr": "lead_uncorr_pfChargedIsoWorstVtx",
        }

    micro_isos = {
        "pfPhoIso03": "lead_pho_iso",
        "pfChargedIsoPFPV": "lead_ch_iso",
        "pfChargedIsoWorstVtx": "lead_ch_iso_worst",
        "pfPhoIso03_uncorr": "lead_pho_iso_uncorr",
        "pfChargedIsoPFPV_uncorr": "lead_ch_iso_uncorr",
        "pfChargedIsoWorstVtx_uncorr": "lead_ch_iso_worst_uncorr",
       }

    nano_df = pd_joined[list(nano_vars.values())]
    nano_df.rename(columns=dict((v, k) for k, v in nano_vars.items()), inplace=True)
    nano_df.to_parquet("nano_{}.parquet".format(args.sd), engine="fastparquet")
    print("Dumped nano dataframe to parquet file")

    micro_df = pd_joined[list(micro_vars.values())]
    micro_df.rename(columns=dict((v, k) for k, v in micro_vars.items()), inplace=True)
    micro_df.to_parquet("micro_{}.parquet".format(args.sd), engine="fastparquet")
    print("Dumped micro dataframe to parquet file")

    nano_df = pd_joined[list(nano_isos.values())]
    nano_df.rename(columns=dict((v, k) for k, v in nano_isos.items()), inplace=True)
    nano_df.to_parquet("nano_{}_isos.parquet".format(args.sd), engine="fastparquet")
    print("Dumped nano dataframe for isos to parquet file")

    micro_df = pd_joined[list(micro_isos.values())]
    micro_df.rename(columns=dict((v, k) for k, v in micro_isos.items()), inplace=True)
    micro_df.to_parquet("micro_{}_isos.parquet".format(args.sd), engine="fastparquet")
    print("Dumped micro dataframe for isos to parquet file")
Beispiel #15
0
MET = MET[Electron_event_mask]
# Photon = Photo[Electron_event_mask]
# Jet = Jet[Electron_event_mask]


# CutFlow
cut1 = np.ones(len(Electron))


print("Cut0: {0}, Cut1: {1}".format(len(cut0), len(cut1)))


import vector

leading_Electron = Electron[:, 0]  # Highest PT
Ele2vec = vector.obj(pt=leading_Electron.PT, phi=leading_Electron.Phi)
MET2vec = vector.obj(pt=MET.MET, phi=MET.Phi)
MT = np.sqrt(
    2 * leading_Electron.PT * MET.MET * (1 - np.cos(abs(MET2vec.deltaphi(Ele2vec))))
)


def draw(arr, title, start, end, bin):  # Array, Name, x_min, x_max, bin-number
    plt.figure(figsize=(8, 5))  # Figure size
    bins = np.linspace(start, end, bin)  # divide start-end range with 'bin' number
    binwidth = (end - start) / bin  # width of one bin

    # Draw histogram
    plt.hist(arr, bins=bins, alpha=0.7, label=title)  # label is needed to draw legend

    plt.xticks(fontsize=16)  # xtick size
Beispiel #16
0
# Copyright (c) 2019-2021, Jonas Eschle, Jim Pivarski, Eduardo Rodrigues, and Henry Schreiner.
#
# Distributed under the 3-clause BSD license, see accompanying file LICENSE
# or https://github.com/scikit-hep/vector for details.

import numpy
import pytest

import vector

v1 = vector.obj(x=1, y=5)
a1 = vector.array({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})

v2 = vector.obj(x=10, y=20)
a2 = vector.array({"x": [10, 100, 1000, 10000], "y": [20, 200, 2000, 20000]})


def test_eq():
    assert v1 == v1
    assert not v1 == v2
    assert (a1 == a1).all()
    assert not (a1 == a2).any()
    assert (v1 == a1).any()
    assert not (v1 == a1).all()
    assert (a1 == v1).any()
    assert not (a1 == v1).all()


def test_ne():
    assert not v1 != v1
    assert v1 != v2
Beispiel #17
0
def test_pos():
    assert +v1 == vector.obj(x=1, y=5)
    assert numpy.allclose(+a1, vector.array({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]}))
Beispiel #18
0
def test_neg():
    assert -v1 == vector.obj(x=-1, y=-5)
    assert numpy.allclose(
        -a1, vector.array({"x": [-1, -2, -3, -4], "y": [-5, -6, -7, -8]})
    )