Exemple #1
0
 def test_numba_getitem_tuple_slice_boolarray(self):
     a = numpy.arange(36).reshape(4, 3, 3)
     a2 = awkward.fromiter(a)
     @numba.njit
     def test1(x, i):
         return x[1:3, i]
     assert test1(a, numpy.array([True, False, True])).tolist() == [[[9, 10, 11], [15, 16, 17]], [[18, 19, 20], [24, 25, 26]]]
     assert test1(a2, numpy.array([True, False, True])).tolist() == [[[9, 10, 11], [15, 16, 17]], [[18, 19, 20], [24, 25, 26]]]
     @numba.njit
     def test2(x, i, j):
         return x[1:3, i, j]
     assert test2.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[9, 16], [18, 25]]
     assert test2(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[9, 16], [18, 25]]
     a = numpy.arange(27).reshape(3, 3, 3)
     a2 = awkward.fromiter(a)
     @numba.njit
     def test3(x, i, j):
         return x[i, j]
     assert test3.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 1, 2], [21, 22, 23]]
     assert test3(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 1, 2], [21, 22, 23]]
     @numba.njit
     def test4(x, i, j):
         return x[i, :, j]
     assert test4.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 3, 6], [19, 22, 25]]
     assert test4(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 3, 6], [19, 22, 25]]
Exemple #2
0
def _append_object(event_list, field):
    new_event_list = []
    for i in range(len(event_list)):
        event_list_i = awkward.fromiter(event_list[i])
        field_i = awkward.fromiter(field[i])
        new_event_list.append(awkward.concatenate([event_list_i, field_i], axis=1).tolist())
    return new_event_list
Exemple #3
0
 def test_numba_getitem_jagged_intarray(self):
     a = JaggedArray.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
     a2 = JaggedArray.fromcounts([2, 0, 1], a)
     @numba.njit
     def test1(x, i):
         return x[i]
     assert test1(a, awkward.fromiter([[2, 0, 0], [], [1]])).tolist() == [[3.3, 1.1, 1.1], [], [5.5]]
     assert test1(a2, awkward.fromiter([[1, 0], [], [0]])).tolist() == [[[], [1.1, 2.2, 3.3]], [], [[4.4, 5.5]]]
     assert test1(a2, awkward.fromiter([[[2, 0, 0], []], [], [[1]]])).tolist() == [[[3.3, 1.1, 1.1], []], [], [[5.5]]]
Exemple #4
0
 def test_numba_getitem_jagged_boolarray(self):
     a = JaggedArray.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
     a2 = JaggedArray.fromcounts([2, 0, 1], a)
     @numba.njit
     def test1(x, i):
         return x[i]
     assert test1(a, awkward.fromiter([[True, False, True], [], [False, True]])).tolist() == [[1.1, 3.3], [], [5.5]]
     assert test1(a2, awkward.fromiter([[True, False], [], [True]])).tolist() == [[[1.1, 2.2, 3.3]], [], [[4.4, 5.5]]]
     assert test1(a2, awkward.fromiter([[[True, False, True], []], [], [[False, True]]])).tolist() == [[[1.1, 3.3], []], [], [[5.5]]]
    def test_jagged_pad(self):
        a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
        assert a.pad(4, clip=True).tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]]
        assert a.pad(4, numpy.ma.masked, clip=True).regular().tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]]

        assert a.pad(4).tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]]
        assert a.pad(4, numpy.ma.masked).regular().tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]]

        a = awkward.fromiter([[1.1, 2.2, 3.3, 4.4, 5.5], [], [6.6, 7.7, 8.8], [9.9]])
        assert a.pad(3).tolist() == [[1.1, 2.2, 3.3, 4.4, 5.5], [None, None, None], [6.6, 7.7, 8.8], [9.9, None, None]]
        assert a.pad(3, clip=True).tolist() == [[1.1, 2.2, 3.3], [None, None, None], [6.6, 7.7, 8.8], [9.9, None, None]]
Exemple #6
0
    def process(self, df):
        output = self.accumulator.identity()
        dataset = df['dataset']

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype==3).sum()
        ndsa = (ljdautype==8).sum()
        isegammajet = (npfmu==0)&(ndsa==0)
        ispfmujet = (npfmu>=2)&(ndsa==0)
        isdsajet = ndsa>0
        label = isegammajet.astype(int)*1+ispfmujet.astype(int)*2+isdsajet.astype(int)*3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype==3)|(ljdautype==8)).sum()
        leptonjets.add_attributes(ismutype=(nmu>=2), iseltype=(nmu==0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(isneutral=(leptonjets.iseltype | (leptonjets.ismutype&(leptonjets.qsum==0))))
        leptonjets = leptonjets[leptonjets.isneutral]

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts>=2
        dileptonjets = leptonjets[twoleptonjets]

        if dileptonjets.size==0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum()==1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents&muljInLeading2Events).astype(int)*1

        doubleMuljEvents = dileptonjets.ismutype.sum()==2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents&muljIsLeading2Events).astype(int)*2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        output['invm_s'].fill(dataset=dataset, mass_s=(lj0+lj1).p4.mass[channel_>0].flatten())
        output['invm_m'].fill(dataset=dataset, mass_m=(lj0+lj1).p4.mass[channel_>0].flatten())
        output['invm_l'].fill(dataset=dataset, mass_l=(lj0+lj1).p4.mass[channel_>0].flatten())

        return output
Exemple #7
0
def test_event_selection_vectors():
    original_file = uproot.open('tests/vectors_tree_file.root')
    treename = 'tree'
    original_tree = original_file[treename]
    new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1]
    try:
        clone_tree(original_tree, new_filename, selection=[True, False, True])
        new_file = uproot.open(new_filename)
        new_tree = new_file[treename]
        assert new_tree['int_vector_branch'].array().tolist() == [[], [13]]
        assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[], [15.15]])).max().max() < 1e-5
        assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[], [16.16]])).max().max() < 1e-5
    finally:
        if os.path.isfile(new_filename):
            os.remove(new_filename)
Exemple #8
0
    def arrow_table(self, chunk_size, event_limit=sys.maxint):
        def group(iterator, n):
            """
            Batch together chunks of events into a single yield
            :param iterator: Iterator from which events are drown
            :param n: Number of events to include in each yield
            :return: Yields a list of n or fewer events
            """
            done = False
            while not done:
                results = []
                try:
                    for i in range(n):
                        results.append(iterator.next())
                    yield results
                except StopIteration:
                    done = True
                    yield results

        for events in group(self.event_iterator.iterate(event_limit),
                            chunk_size):
            object_array = awkward.fromiter(events)
            attr_dict = {}
            for attr_name in self.event_iterator.attr_name_list:
                branch_name = attr_name.split('.')[0].strip(' ')
                a_name = attr_name.split('.')[1]

                attr_dict[branch_name + '_' + a_name.strip('()')] = \
                    object_array[branch_name][a_name]

            object_table = awkward.Table(**attr_dict)
            yield awkward.toarrow(object_table)
Exemple #9
0
def ak_transpose(array):
    """
    Transposes the first two dimensions of an awkward array.
    Useful for (n_events x n_features) --> (n_features x n_events)
    or  (n_features x n_events) --> (n_events x n_features)
    """
    return ak.fromiter(array[:, i] for i in range(len(array[0])))
Exemple #10
0
def load_root(file, ttname, ilo=None, ihi=None, brlist=None):
    """
    use pyroot to save decoded (uncompressed) MGTWaveforms,
    into awkward's hdf5 file object.
    this is to compare against uproot, which reads compressed wfs.
    """
    from ROOT import TFile, TTree, MGTWaveform, MJTMSWaveform

    tf = TFile(file)
    tt = tf.Get(ttname)
    nevt = tt.GetEntries()
    tt.GetEntry(0)
    is_ms = tt.run.GetUseMultisampling()

    # build w/ python primitive types and convert to JaggedArray after the loop.
    # JaggedArray requires one entry per event (have to handle multi-detector).
    br_list = ['fWaveforms', 'fAuxWaveforms', 'fMSWaveforms'
               ] if is_ms else ['fWaveforms']
    pyarrs = {br: [] for br in br_list}
    delim = 0xDEADBEEF

    # loop over tree
    ilo = 0 if ilo == None else ilo
    ihi = nevt if ihi == None else ihi

    for i in range(ilo, ihi):
        tt.GetEntry(i)
        nwf = tt.channelData.GetEntries()

        # concat each hit into a single array
        ewf, ewfa, ewfms = [], [], []
        for j in range(nwf):

            if is_ms:
                wf = tt.event.GetWaveform(j)
                wfa = tt.event.GetAuxWaveform(j)
                wfms = MJTMSWaveform(wf, wfa)
                ewf.extend([wf[i] for i in range(wf.GetLength())])
                ewfa.extend([wfa[i] for i in range(wfa.GetLength())])
                ewfms.extend(wfms[i] for i in range(wfms.GetLength()))
                ewf.append(delim)
                ewfa.append(delim)
                ewfms.append(delim)
            else:
                wf = tt.event.GetWaveform(j)
                ewf.extend([wf[i] for i in range(wf.GetLength())])
                ewf.append(delim)

        if is_ms:
            pyarrs['fWaveforms'].append(ewf)
            pyarrs['fAuxWaveforms'].append(ewfa)
            pyarrs['fMSWaveforms'].append(ewfms)
        else:
            pyarrs['fWaveforms'].append(ewf)

    uarrs = {}
    for wf in pyarrs.keys():
        uarrs[wf] = awkward.fromiter(pyarrs[wf])

    return uarrs
Exemple #11
0
 def test_numba_getitem_tuple_slice_intarray(self):
     a = numpy.arange(36).reshape(4, 3, 3)
     a2 = awkward.fromiter(a)
     @numba.njit
     def test1(x, i):
         return x[1:3, i]
     assert test1(a, numpy.array([1, 0, 2])).tolist() == [[[12, 13, 14], [9, 10, 11], [15, 16, 17]], [[21, 22, 23], [18, 19, 20], [24, 25, 26]]]
     assert test1(a2, numpy.array([1, 0, 2])).tolist() == [[[12, 13, 14], [9, 10, 11], [15, 16, 17]], [[21, 22, 23], [18, 19, 20], [24, 25, 26]]]
    def test_physics_jetcleaning(self):
        jet_m = awkward.fromiter([[60.0, 70.0, 80.0], [], [90.0, 100.0]])

        jet_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]])
        e_pt = awkward.fromiter([[20.2, 50.5], [50.5], [50.5]])

        jet_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]])
        e_eta = awkward.fromiter([[-2.2, 0.0], [0.0], [1.1]])

        jet_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]])
        e_phi = awkward.fromiter([[0.1, 0.78], [0.78], [-0.77]])

        jets = uproot_methods.TLorentzVectorArray.from_ptetaphim(
            jet_pt, jet_eta, jet_phi, jet_m)
        electrons = uproot_methods.TLorentzVectorArray.from_ptetaphim(
            e_pt, e_eta, e_phi, 0.000511)

        combinations = jets.cross(electrons, nested=True)

        def delta_r(one, two):
            return one.delta_r(two)

        assert (~(delta_r(combinations.i0, combinations.i1) < 0.5).any()
                ).tolist() == [[True, False, True], [], [True, False]]

        (jets[~(delta_r(combinations.i0, combinations.i1) < 0.5).any()])
Exemple #13
0
 def test_issue367(self):
     t = uproot.open("tests/samples/issue367.root")["tree"]
     assert awkward.fromiter(
         t.array("weights.second"))[0].counts.tolist() == [
             1000, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
             10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1000, 1000,
             1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
             100, 100, 100, 1
         ]
Exemple #14
0
def merge_npzs_to_ak(rawdir, outfile=None, nmax=None):
    """
    Loops over all .npz files in rawdir, stacks all events into an ak array,
    and dumps it to a file.
    """
    if outfile is None: outfile = osp.dirname(rawdir) + '/merged.awkd'
    bbefp.logger.info(f'Merging {rawdir} --> {outfile}')
    merged = ak.fromiter(_iter_npzs(rawdir, nmax))
    ak.save(outfile, ak_transpose(merged))
Exemple #15
0
def test_floating_point_vectors():
    original_file = uproot.open('tests/floating_point_vectors_tree_file.root')
    treename = 'tree'
    original_tree = original_file[treename]
    new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1]
    try:
        clone_tree(original_tree, new_filename)
        new_file = uproot.open(new_filename)
        new_tree = new_file[treename]
        assert new_tree['float_vector_branch'].array()[0].dtype == np.dtype('float32')
        assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[],
                                                                               [-31.31, 32.32, 33.33],
                                                                               [-47.47]])).max().max() < 1e-5
        assert new_tree['double_vector_branch'].array()[0].dtype == np.dtype('float64')
        assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[],
                                                                                [-34.34, 35.35, 36.36],
                                                                                [-48.48]])).max().max() < 1e-5
    finally:
        if os.path.isfile(new_filename):
            os.remove(new_filename)
    def test_jagged_cross_argnested(self):
        a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
        b = awkward.fromiter([[100, 200], [300], [400]])
        c = awkward.fromiter([[999], [999], [999, 888]])

        assert a.cross(b).tolist() == [[(1.1, 100), (1.1, 200), (2.2, 100), (2.2, 200), (3.3, 100), (3.3, 200)], [], [(4.4, 400), (5.5, 400)]]
        assert a.argcross(b).tolist() == [[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)], [], [(0, 0), (1, 0)]]
        assert a.cross(b, nested=True).tolist() == [[[(1.1, 100), (1.1, 200)], [(2.2, 100), (2.2, 200)], [(3.3, 100), (3.3, 200)]], [], [[(4.4, 400)], [(5.5, 400)]]]
        assert a.argcross(b, nested=True).tolist() == [[[(0, 0), (0, 1)], [(1, 0), (1, 1)], [(2, 0), (2, 1)]], [], [[(0, 0)], [(1, 0)]]]

        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[0] == [[[(ai, bi, ci) for ci in c[0]] for bi in b[0]] for ai in a[0]]
        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[1] == [[[(ai, bi, ci) for ci in c[1]] for bi in b[1]] for ai in a[1]]
        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[2] == [[[(ai, bi, ci) for ci in c[2]] for bi in b[2]] for ai in a[2]]

        assert a.cross(b).cross(c).tolist() == [[(1.1, 100, 999), (1.1, 200, 999), (2.2, 100, 999), (2.2, 200, 999), (3.3, 100, 999), (3.3, 200, 999)], [], [(4.4, 400, 999), (4.4, 400, 888), (5.5, 400, 999), (5.5, 400, 888)]]
        assert a.cross(b, nested=True).cross(c).tolist() == [[[(1.1, 100, 999), (1.1, 200, 999)], [(2.2, 100, 999), (2.2, 200, 999)], [(3.3, 100, 999), (3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888)], [(5.5, 400, 999), (5.5, 400, 888)]]]
        assert a.cross(b).cross(c, nested=True).tolist() == [[[(1.1, 100, 999)], [(1.1, 200, 999)], [(2.2, 100, 999)], [(2.2, 200, 999)], [(3.3, 100, 999)], [(3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888)], [(5.5, 400, 999), (5.5, 400, 888)]]]

        a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
        b = awkward.fromiter([[100, 200], [300], [400]])
        c = awkward.fromiter([[999], [999], [999, 888, 777]])

        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[0] == [[[(ai, bi, ci) for ci in c[0]] for bi in b[0]] for ai in a[0]]
        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[1] == [[[(ai, bi, ci) for ci in c[1]] for bi in b[1]] for ai in a[1]]
        assert a.cross(b, nested=True).cross(c, nested=True).tolist()[2] == [[[(ai, bi, ci) for ci in c[2]] for bi in b[2]] for ai in a[2]]

        assert a.cross(b).cross(c).tolist() == [[(1.1, 100, 999), (1.1, 200, 999), (2.2, 100, 999), (2.2, 200, 999), (3.3, 100, 999), (3.3, 200, 999)], [], [(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777), (5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]]
        assert a.cross(b, nested=True).cross(c).tolist() == [[[(1.1, 100, 999), (1.1, 200, 999)], [(2.2, 100, 999), (2.2, 200, 999)], [(3.3, 100, 999), (3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777)], [(5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]]]
        assert a.cross(b).cross(c, nested=True).tolist() == [[[(1.1, 100, 999)], [(1.1, 200, 999)], [(2.2, 100, 999)], [(2.2, 200, 999)], [(3.3, 100, 999)], [(3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777)], [(5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]]]
 def test_jagged_zip(self):
     a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
     b = awkward.JaggedArray([1, 5, 5], [4, 5, 7], [999, 10, 20, 30, 999, 40, 50, 999])
     c = numpy.array([100, 200, 300])
     d = 1000
     assert awkward.JaggedArray.zip(one=a, two=b).tolist() == [[{"one": 1.1, "two": 10}, {"one": 2.2, "two": 20}, {"one": 3.3, "two": 30}], [], [{"one": 4.4, "two": 40}, {"one": 5.5, "two": 50}]]
     assert awkward.JaggedArray.zip(one=b, two=a).tolist() == [[{"one": 10, "two": 1.1}, {"one": 20, "two": 2.2}, {"one": 30, "two": 3.3}], [], [{"one": 40, "two": 4.4}, {"one": 50, "two": 5.5}]]
     assert awkward.JaggedArray.zip(one=b, two=c).tolist() == [[{"one": 10, "two": 100}, {"one": 20, "two": 100}, {"one": 30, "two": 100}], [], [{"one": 40, "two": 300}, {"one": 50, "two": 300}]]
     assert awkward.JaggedArray.zip(one=b, two=d).tolist() == [[{"one": 10, "two": 1000}, {"one": 20, "two": 1000}, {"one": 30, "two": 1000}], [], [{"one": 40, "two": 1000}, {"one": 50, "two": 1000}]]
     assert a.zip(b).tolist() == [[(1.1, 10), (2.2, 20), (3.3, 30)], [], [(4.4, 40), (5.5, 50)]]
     assert b.zip(a).tolist() == [[(10, 1.1), (20, 2.2), (30, 3.3)], [], [(40, 4.4), (50, 5.5)]]
     assert b.zip(c).tolist() == [[(10, 100), (20, 100), (30, 100)], [], [(40, 300), (50, 300)]]
     assert b.zip(d).tolist() == [[(10, 1000), (20, 1000), (30, 1000)], [], [(40, 1000), (50, 1000)]]
Exemple #18
0
def NestNestObjArrayToJagged(objarr):
    """uproot read vector<vector<number>> TBranch
       as objectArray, this function convert it
       to JaggedJaggedArray
    """

#     # jaggedArray of lists
#     jaggedList = JaggedArray.fromiter(objarr)
#     # flat to 1 level
#     _jagged = JaggedArray.fromiter(jaggedList.content)

#     return JaggedArray.fromoffsets(jaggedList.offsets, _jagged)
    return awkward.fromiter(objarr)
Exemple #19
0
def mean_fname(fname, i):
    if useramdisk:
        _cp_xrootd_to_ramdisk(fname)
    means = {key: [] for key in all_branches}
    varis = {key: [] for key in all_branches}
    for arrays in uproot.iterate(fname,
                                 treename,
                                 branches + other_branches,
                                 namedecode="utf-8",
                                 entrysteps=entrysteps):
        for key in arrays:
            # convert vector<vector<T>> (ObjectArray by default) into nested JaggedArray
            if isinstance(arrays[key], awkward.ObjectArray):
                arrays[key] = awkward.fromiter(arrays[key])
        arrays, keep = build_truth(arrays, fname)
        for key in arrays:
            arrays[key] = arrays[key][keep]

        arrays = transform(arrays)
        for key in arrays:
            if key not in all_branches: continue
            # skip jet_daughter since it was renamed
            if 'jet_daughter_' in key: continue
            a = arrays[key]
            while isinstance(a, awkward.JaggedArray):
                a = a.flatten()
            if a.size == 0: continue
            # remove NaNs, but should understand why they happen
            a = a[~np.isnan(a)]
            a = a[~np.isinf(a)]
            m = a.mean()
            v = a.var()
            if np.isnan(m):
                logging.error(f'NaN found: {key}')
                print(fname)
                raise ValueError
            elif np.isinf(m):
                logging.error(f'Inf found: {key}')
                print(fname)
                raise ValueError
            else:
                # protection against empty arrays
                means[key] += [m]
                varis[key] += [v]
    if useramdisk:
        _rm_ramdisk(fname)
    return {'means': means, 'varis': varis}
    def test_physics_matching(self):
        gen_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]])
        reco_pt = awkward.fromiter([[20.2, 10.1, 30.3, 50.5], [50.5], [50.5]])

        gen_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]])
        reco_eta = awkward.fromiter([[-2.2, -3.3, 2.2, 0.0], [0.0], [1.1]])

        gen_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]])
        reco_phi = awkward.fromiter([[0.1, -1.4, 1.4, 0.78], [0.78], [-0.77]])

        gen = uproot_methods.TLorentzVectorArray.from_ptetaphim(
            gen_pt, gen_eta, gen_phi, 0.2)
        reco = uproot_methods.TLorentzVectorArray.from_ptetaphim(
            reco_pt, reco_eta, reco_phi, 0.2)

        ("gen", gen)
        ("reco", reco)

        ("gen.cross(reco)", gen.cross(reco))

        pairing = gen.cross(reco, nested=True)
        ("pairing = gen.cross(reco, nested=True)", gen.cross(reco,
                                                             nested=True))

        metric = pairing.i0.delta_r(pairing.i1)
        ("metric = pairing.i0.delta_r(pairing.i1)", metric)

        index_of_minimized = metric.argmin()
        ("index_of_minimized = metric.argmin()", index_of_minimized)
        assert index_of_minimized.tolist() == [[[1], [0], [2]], [], [[0], [0]]]

        ("metric[index_of_minimized]", metric[index_of_minimized])

        passes_cut = (metric[index_of_minimized] < 0.5)
        ("passes_cut = (metric[index_of_minimized] < 0.5)", passes_cut)
        assert passes_cut.tolist() == [[[True], [True], [True]], [],
                                       [[False], [True]]]

        best_pairings_that_pass_cut = pairing[index_of_minimized][passes_cut]
        ("best_pairings_that_pass_cut = pairing[index_of_minimized][passes_cut]",
         best_pairings_that_pass_cut)

        genrecos = best_pairings_that_pass_cut.flatten(axis=1)
        ("genrecos = best_pairings_that_pass_cut.flatten(axis=1)", genrecos)

        ("genrecos.counts", genrecos.counts)
        ("gen.counts", gen.counts)
        assert genrecos.counts.tolist() == [3, 0, 1]
        assert gen.counts.tolist() == [3, 0, 2]

        ("genrecos.i0.pt", genrecos.i0.pt)
        ("genrecos.i1.pt", genrecos.i1.pt)
        def make_jets_and_electrons():
            jet_m = awkward.fromiter([[60.0, 70.0, 80.0], [], [90.0, 100.0]])

            jet_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]])
            e_pt = awkward.fromiter([[20.2, 50.5], [50.5], [50.5]])

            jet_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]])
            e_eta = awkward.fromiter([[-2.2, 0.0], [0.0], [1.1]])

            jet_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]])
            e_phi = awkward.fromiter([[0.1, 0.78], [0.78], [-0.77]])

            jets = uproot_methods.TLorentzVectorArray.from_ptetaphim(
                jet_pt, jet_eta, jet_phi, jet_m)
            electrons = uproot_methods.TLorentzVectorArray.from_ptetaphim(
                e_pt, e_eta, e_phi, 0.000511)

            return jets, electrons
    def __next__(self):
        if self._batches is None:
            raise Exception(
                'Please set entrysteps using set_entrysteps() function before use!'
            )

        if self._current_index >= len(self._batches):
            self._current_index = 0
            raise StopIteration

        sub_table = pa.Table.from_batches(
            self._batches[self._current_index:self._current_index + 1])
        self._current_index = self._current_index + 1

        chunk = sub_table.to_pydict()

        for item in chunk:
            chunk[item] = awkward.fromiter(chunk[item])

        return chunk
Exemple #23
0
def test_new_vector_branch():
    original_file = uproot.open('tests/vectors_tree_file.root')
    treename = 'tree'
    original_tree = original_file[treename]
    new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1]
    try:
        new_branch_dictionary = {'new_int16_vector_branch': awkward.fromiter([[-1], [-2, 3], []]).astype(np.dtype('int16')),
                                 'new_int32_vector_branch': awkward.fromiter([[-4], [-5, 6], []]).astype(np.dtype('int32')),
                                 'new_float32_vector_branch': awkward.fromiter([[-7.7],
                                                                                [-8.8, 9.9],
                                                                                []]).astype(np.dtype('float32')),
                                 'new_float64_vector_branch': awkward.fromiter([[-10.10],
                                                                                [-11.11, 12.12],
                                                                                []]).astype(np.dtype('float64'))}
        clone_tree(original_tree, new_filename, new_branches=new_branch_dictionary)
        new_file = uproot.open(new_filename)
        new_tree = new_file[treename]
        assert new_tree['int_vector_branch'].array().tolist() == [[], [-1, 2, 3], [13]]
        assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[],
                                                                               [-7.7, 8.8, 9.9],
                                                                               [15.15]])).max().max() < 1e-5
        assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[],
                                                                                [-10.10, 11.11, 12.12],
                                                                                [16.16]])).max().max() < 1e-5
        assert new_tree['new_int16_vector_branch'].array()[0].dtype == np.dtype('int16')
        assert new_tree['new_int16_vector_branch'].array().tolist() == [[-1], [-2, 3], []]
        assert new_tree['new_int32_vector_branch'].array()[0].dtype == np.dtype('int32')
        assert new_tree['new_int32_vector_branch'].array().tolist() == [[-4], [-5, 6], []]
        assert new_tree['new_float32_vector_branch'].array()[0].dtype == np.dtype('float32')
        assert abs(new_tree['new_float32_vector_branch'].array() - awkward.fromiter([[-7.7],
                                                                                     [-8.8, 9.9],
                                                                                     []])).max().max() < 1e-5
        assert new_tree['new_float64_vector_branch'].array()[0].dtype == np.dtype('float64')
        assert abs(new_tree['new_float64_vector_branch'].array() - awkward.fromiter([[-10.10],
                                                                                     [-11.11, 12.12],
                                                                                     []])).max().max() < 1e-5
    finally:
        if os.path.isfile(new_filename):
            os.remove(new_filename)
Exemple #24
0
def main():
    """ Loop over all combinations of mass and width. """
    args = parse_input()
    masses = np.linspace(
        args.mass_min, args.mass_max,
        np.ceil((args.mass_max - args.mass_min) / args.mass_step) + 1)
    widths = np.linspace(
        args.width_min, args.width_max,
        np.ceil((args.width_max - args.width_min) / args.width_step) + 1)

    def generator():
        with tqdm.tqdm(unit='event',
                       total=masses.size * widths.size * args.nevents,
                       desc='Generating') as pbar:
            for mass in masses:
                for width in widths:
                    yield from run(args.nevents, mass, width)
                    pbar.update(args.nevents)

    events = ak.fromiter(generator())
    ak.save('events.awkd', events, mode='w')
def gen_reco_TLV():
    gen_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]])
    reco_pt = awkward.fromiter([[20.2, 10.1, 30.3, 50.5], [50.5], [60]])

    gen_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]])
    reco_eta = awkward.fromiter([[-2.2, -3.3, 2.2, 0.0], [0.0], [1.1]])

    gen_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]])
    reco_phi = awkward.fromiter([[0.1, -1.4, 1.4, 0.78], [0.78], [-0.77]])

    gen = uproot_methods.TLorentzVectorArray.from_ptetaphim(
        gen_pt, gen_eta, gen_phi, 0.2)
    reco = uproot_methods.TLorentzVectorArray.from_ptetaphim(
        reco_pt, reco_eta, reco_phi, 0.2)

    return (gen, reco)
Exemple #26
0
    def event(self, chunk):

        # get the data
        Hit_Chan, Hit_Start_Tick = chunk.tree.arrays(
            ['Hit_Chan', 'Hit_Start_Tick'], outputtype=tuple)

        # apply a mask if required
        if self.mask:
            mask = chunk.tree.array(self.mask)
            Hit_Chan = Hit_Chan[mask]
            Hit_Start_Tick = Hit_Start_Tick[mask]

        clusterNHits = []

        # loop over events
        for chans, starts in zip(Hit_Chan, Hit_Start_Tick):

            # channels with hits in this event
            print("Hit channels : ", chans)

            # find the clusters
            clusters = np.split(chans, np.where(np.diff(chans) != 1)[0] + 1)

            # this will store a variable for each cluster
            n_hits_per_clus = []

            # loop over clusters
            for i, clus in zip(range(len(clusters)), clusters):
                print(i, clus)
                n_hits_per_clus.append(len(clus))

            # add the list of cluster variables to the output list
            clusterNHits.append(n_hits_per_clus)

        # store the new cluster variable in the event
        chunk.tree.new_variable(self.out_var, awkward.fromiter(clusterNHits))

        return True
    def write_root_file(self, particle_events, file_handle):
        print("Getting AWkward..")
        a = awkward.fromiter(particle_events)
        print("Writing ROOT file ...")
        file_handle["EVENT_NTUPLE"].extend({
            "pulse_height":
            a.contents["pulse_height"],
            "chan":
            a.contents["chan"],
            "timestamp":
            a.contents["timestamp"],
            "hit_count":
            a.contents["hit_count"]
        })
        #    file_handle["EVENT_NTUPLE"]["pulse_height"].newbasket(a.contents["pulse_height"])
        #    file_handle["EVENT_NTUPLE"]["chan"].newbasket(a.contents["chan"])
        #    file_handle["EVENT_NTUPLE"]["timestamp"].newbasket(a.contents["timestamp"])
        #    file_handle["EVENT_NTUPLE"]["hit_count"].newbasket(a.contents["hit_count"])

        # For more info on root_pandas : https://github.com/scikit-hep/root_pandas
        #pd_particle_events.to_root(output_filename, key='EVENT_NTUPLE')  # write out pandas dataframe to ROOT file, yup, that's it...

        return 0
Exemple #28
0
def get_histograms(list_of_files_, variable_list_, cuts_to_apply_=None):

    hist = OrderedDict()
    counts = OrderedDict()
    for sample in list_of_files_:
        hist[sample] = OrderedDict()
        counts[sample] = OrderedDict()
        for tree_name in list_of_files_[sample]['trees']:
            print('\nReserving Histograms for:', sample, tree_name)
            hist[sample][tree_name] = OrderedDict()
            counts[sample][tree_name] = OrderedDict()
            # Reserve histograms
            hist[sample][tree_name]['MET'] = rt.TH1D(
                'MET_' + sample + '_' + tree_name, 'E_{T}^{miss} [GeV]', 500,
                0, 1000)
            hist[sample][tree_name]['S_Flavor_jet'] = rt.TH1D(
                'S_Flavor_jet_' + sample + '_' + tree_name, 'Flavor S jets',
                20, 0, 20)
            hist[sample][tree_name]['ISR_Flavor_jet'] = rt.TH1D(
                'ISR_Flavor_jet_' + sample + '_' + tree_name,
                'Flavor ISR jets', 20, 0, 20)
            hist[sample][tree_name]['S_Flavor_lep'] = rt.TH1D(
                'S_Flavor_lep_' + sample + '_' + tree_name, 'Flavor S leps',
                20, 0, 20)
            hist[sample][tree_name]['ISR_Flavor_lep'] = rt.TH1D(
                'ISR_Flavor_lep_' + sample + '_' + tree_name,
                'Flavor ISR leps', 20, 0, 20)

            hist[sample][tree_name]['Lep_to_Charge'] = rt.TH2D(
                'Lep_to_Charge_' + sample + '_' + tree_name,
                'lep Flavor to Charge', 20, 0, 20, 5, -2, 2)
            hist[sample][tree_name]['Lep_to_Lep'] = rt.TH2D(
                'Lep_to_Lep_' + sample + '_' + tree_name,
                '2leps to 2 opp leps', 2, 0, 2, 2, 0, 2)

            hist[sample][tree_name]['RISR'] = rt.TH1D(
                'risr_' + sample + '_' + tree_name, 'RISR', 500, 0, 2)
            hist[sample][tree_name]['PTISR'] = rt.TH1D(
                'ptisr_' + sample + '_' + tree_name, 'p_{T} ISR [GeV]', 500, 0,
                1000)
            hist[sample][tree_name]['PTCM'] = rt.TH1D(
                'ptcm_' + sample + '_' + tree_name, 'p_{T} CM [GeV]', 500, 0,
                1000)

            hist[sample][tree_name]['RISR_PTISR'] = rt.TH2D(
                'RISR_PTISR_' + sample + '_' + tree_name, 'RISR_PTISR', 500, 0,
                2, 500, 0, 1000)

            hist[sample][tree_name]['RISR_PTCM'] = rt.TH2D(
                'RISR_PTCM_' + sample + '_' + tree_name, 'RISR_PTCM', 500, 0,
                2, 500, 0, 1000)

            hist[sample][tree_name]['PTCM_div_PTISR'] = rt.TH1D(
                'PTCM_div_PTISR_' + sample + '_' + tree_name, 'PTCM_div_PTISR',
                500, 0, 1)
            hist[sample][tree_name]['dphi_PTCM_div_PTISR'] = rt.TH2D(
                'dphi_PTCM_div_PTISR_' + sample + '_' + tree_name,
                'dphi_PTCM_div_PTISR', 500, 0, np.pi, 500, 0, 1)
            hist[sample][tree_name]['dphi_PTCM'] = rt.TH2D(
                'dphi_PTCM_' + sample + '_' + tree_name, 'dphi_PTCM', 500, 0,
                np.pi, 500, 0, 1000)

            hist[sample][tree_name]['PTISR_PTCM'] = rt.TH2D(
                'PTISR_PTCM_' + sample + '_' + tree_name, 'PTISR_PTCM', 500, 0,
                1000, 500, 0, 1000)

            hist[sample][tree_name]['S_ISR_N_jet'] = rt.TH2D(
                'S_ISR_N_jet_' + sample + '_' + tree_name, 'N jet, S-ISR', 15,
                0, 15, 15, 0, 15)
            hist[sample][tree_name]['S_ISR_N_lep'] = rt.TH2D(
                'S_ISR_N_lep_' + sample + '_' + tree_name, 'N lep, S-ISR', 15,
                0, 15, 15, 0, 15)

            hist[sample][tree_name]['S_ISR_N_loose_jet'] = rt.TH2D(
                'S_ISR_N_loose_jet_' + sample + '_' + tree_name,
                'N loose S-ISR', 15, 0, 15, 15, 0, 15)
            hist[sample][tree_name]['S_ISR_N_medium_jet'] = rt.TH2D(
                'S_ISR_N_medium_jet_' + sample + '_' + tree_name,
                'N medium S-ISR', 15, 0, 15, 15, 0, 15)
            hist[sample][tree_name]['S_ISR_N_tight_jet'] = rt.TH2D(
                'S_ISR_N_tight_jet_' + sample + '_' + tree_name,
                'N tight S-ISR', 15, 0, 15, 15, 0, 15)

            hist[sample][tree_name]['RISR_N_jet'] = rt.TH2D(
                'RISR_N_jet_' + sample + '_' + tree_name, 'RISR N jet', 500, 0,
                2, 20, 0, 20)
            hist[sample][tree_name]['RISR_N_lep'] = rt.TH2D(
                'RISR_N_lep_' + sample + '_' + tree_name, 'RISR N lep', 500, 0,
                2, 20, 0, 20)

            hist[sample][tree_name]['RISR_N_S_jet'] = rt.TH2D(
                'RISR_N_S_jet_' + sample + '_' + tree_name, 'RISR N S jet',
                500, 0, 2, 20, 0, 20)
            hist[sample][tree_name]['RISR_N_S_lep'] = rt.TH2D(
                'RISR_N_S_lep_' + sample + '_' + tree_name, 'RISR N S lep',
                500, 0, 2, 20, 0, 20)

            hist[sample][tree_name]['RISR_N_ISR_jet'] = rt.TH2D(
                'RISR_N_ISR_jet_' + sample + '_' + tree_name, 'RISR N ISR jet',
                500, 0, 2, 20, 0, 20)
            hist[sample][tree_name]['RISR_N_ISR_lep'] = rt.TH2D(
                'RISR_N_ISR_lep_' + sample + '_' + tree_name, 'RISR N ISR lep',
                500, 0, 2, 20, 0, 20)

            hist[sample][tree_name]['PTISR_N_jet'] = rt.TH2D(
                'PTISR_N_jet_' + sample + '_' + tree_name, 'PTISR N jet', 500,
                0, 1000, 20, 0, 20)
            hist[sample][tree_name]['PTISR_N_lep'] = rt.TH2D(
                'PTISR_N_lep_' + sample + '_' + tree_name, 'PTISR N lep', 500,
                0, 1000, 20, 0, 20)

            hist[sample][tree_name]['PTISR_N_S_jet'] = rt.TH2D(
                'PTISR_N_S_jet_' + sample + '_' + tree_name, 'PTISR N S jet',
                500, 0, 1000, 20, 0, 20)
            hist[sample][tree_name]['PTISR_N_S_lep'] = rt.TH2D(
                'PTISR_N_S_lep_' + sample + '_' + tree_name, 'PTISR N S lep',
                500, 0, 1000, 20, 0, 20)

            hist[sample][tree_name]['PTISR_N_ISR_jet'] = rt.TH2D(
                'PTISR_N_ISR_jet_' + sample + '_' + tree_name,
                'PTISR N ISR jet', 500, 0, 1000, 20, 0, 20)
            hist[sample][tree_name]['PTISR_N_ISR_lep'] = rt.TH2D(
                'PTISR_N_ISR_lep_' + sample + '_' + tree_name,
                'PTISR N ISR lep', 500, 0, 1000, 20, 0, 20)

            hist[sample][tree_name]['PTCM_N_jet'] = rt.TH2D(
                'PTCM_N_jet_' + sample + '_' + tree_name, 'PTCM N jet', 500, 0,
                1000, 20, 0, 20)
            hist[sample][tree_name]['PTCM_N_lep'] = rt.TH2D(
                'PTCM_N_lep_' + sample + '_' + tree_name, 'PTCM N lep', 500, 0,
                1000, 20, 0, 20)

            hist[sample][tree_name]['PTCM_N_S_jet'] = rt.TH2D(
                'PTCM_N_S_jet_' + sample + '_' + tree_name, 'PTCM N S jet',
                500, 0, 1000, 20, 0, 20)
            hist[sample][tree_name]['PTCM_N_S_lep'] = rt.TH2D(
                'PTCM_N_S_lep_' + sample + '_' + tree_name, 'PTCM N S lep',
                500, 0, 1000, 20, 0, 20)

            hist[sample][tree_name]['PTCM_N_ISR_jet'] = rt.TH2D(
                'PTCM_N_ISR_jet_' + sample + '_' + tree_name, 'PTCM N ISR jet',
                500, 0, 1000, 20, 0, 20)
            hist[sample][tree_name]['PTCM_N_ISR_lep'] = rt.TH2D(
                'PTCM_N_ISR_lep_' + sample + '_' + tree_name, 'PTCM N ISR lep',
                500, 0, 1000, 20, 0, 20)

        i_entries = 0
        for itree, in_tree in enumerate(list_of_files_[sample]['trees']):
            for events in ur.tree.iterate(list_of_files_[sample]['files'],
                                          in_tree,
                                          branches=variable_list_,
                                          entrysteps=10000):
                print('\nGetting Histograms for:', sample, tree_name)
                print('tree: ', itree + 1)
                i_entries += 10000
                print(i_entries)

                print(events)
                pt_jet = events[b'PT_jet']
                flavor_jet = events[b'Flavor_jet']
                isr_index_jet = events[b'index_jet_ISR']
                s_index_jet = events[b'index_jet_S']
                bjet_tag = events[b'Btag_jet']

                pt_lep = events[b'PT_lep']
                ch_lep = events[b'Charge_lep']
                id_lep = events[b'ID_lep']
                pdgid_lep = events[b'PDGID_lep']
                isr_index_lep = events[b'index_lep_ISR']
                s_index_lep = events[b'index_lep_S']

                met = events[b'MET']
                risr = aw.fromiter(events[b'RISR'])
                ptisr = events[b'PTISR']
                ptcm = events[b'PTCM']
                dphi = events[b'dphiCMI']
                weight = events[b'weight']

                len_jet = pt_jet.stops - pt_jet.starts
                max_n_jets = np.amax(len_jet)

                # pt_jet = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in pt_jet])
                # flavor_jet = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in flavor_jet])
                # bjet_tag = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in bjet_tag])

                len_lep = pt_lep.stops - pt_lep.starts
                max_n_leps = np.amax(len_lep)

                # pt_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=np.nan) for leps in pt_lep])
                # ch_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=0) for leps in ch_lep])
                # pdgid_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=np.nan) for leps in pdgid_lep])
                only_2_leps = ([
                    True if lep == 2 else False for lep in len_lep
                ])
                only_2_opp_leps = ([
                    True if lep == 2 and len(charge[charge > 0]) > 0
                    and len(charge[charge < 0]) > 0 else False
                    for lep, charge in zip(only_2_leps, ch_lep)
                ])

                isr_index_jet = np.array(isr_index_jet)
                s_index_jet = np.array(s_index_jet)
                isr_index_lep = np.array(isr_index_lep)
                s_index_lep = np.array(s_index_lep)

                risr = risr[:, 1]
                isr_index_jet = isr_index_jet[:, 1]
                s_index_jet = s_index_jet[:, 1]
                isr_index_lep = isr_index_lep[:, 1]
                s_index_lep = s_index_lep[:, 1]
                ptcm = ptcm.content[:, 1]
                dphi = dphi.content[:, 1]

                # risr_lepV_jetI = risr[:,0]
                # risr_lepV_jetA = risr[:,1]
                # risr_lepA_jetA = risr[:,2]

                print('\ncreating masks and weights')
                print('-> bjet masks')
                loose_mask = bjet_tag > 0.5426
                medium_mask = bjet_tag > 0.8484
                tight_mask = bjet_tag > 0.9535

                has_2_loose = ([
                    True if len(mask[mask]) >= 2 else False
                    for mask in loose_mask
                ])
                has_2_medium = ([
                    True if len(mask[mask]) >= 2 else False
                    for mask in medium_mask
                ])
                has_2_tight = ([
                    True if len(mask[mask]) >= 2 else False
                    for mask in tight_mask
                ])

                print('-> S bjet masks')
                loose_s_mask = ([
                    mask[index] for mask, index in zip(loose_mask, s_index_jet)
                ])
                medium_s_mask = ([
                    mask[index]
                    for mask, index in zip(medium_mask, s_index_jet)
                ])
                tight_s_mask = ([
                    mask[index] for mask, index in zip(tight_mask, s_index_jet)
                ])

                print('-> ISR bjet masks')
                loose_isr_mask = ([
                    mask[index]
                    for mask, index in zip(loose_mask, isr_index_jet)
                ])
                medium_isr_mask = ([
                    mask[index]
                    for mask, index in zip(medium_mask, isr_index_jet)
                ])
                tight_isr_mask = ([
                    mask[index]
                    for mask, index in zip(tight_mask, isr_index_jet)
                ])

                print('-> event bjet masks')
                is_loose = ([np.any(event) for event in loose_mask])
                is_medium = ([np.any(event) for event in medium_mask])
                is_tight = ([np.any(event) for event in tight_mask])

                print('-> jet weights')
                jet_weight = ([
                    np.array([np.float64(event)] * len(jets[~np.isnan(jets)]))
                    for jets, event in zip(pt_jet, weight)
                ])
                # jet_weight = ([np.pad(w, (0, max_n_jets - len(w)), 'constant', constant_values=np.nan) for w in jet_weight])

                s_jet_weight = ([
                    jets[index] for jets, index in zip(jet_weight, s_index_jet)
                ])
                isr_jet_weight = ([
                    jets[index]
                    for jets, index in zip(jet_weight, isr_index_jet)
                ])

                pt_s_jet = ([
                    jets[index] for jets, index in zip(pt_jet, s_index_jet)
                ])
                pt_isr_jet = ([
                    jets[index] for jets, index in zip(pt_jet, isr_index_jet)
                ])

                flavor_s_jet = ([
                    jets[index] for jets, index in zip(flavor_jet, s_index_jet)
                ])
                flavor_isr_jet = ([
                    jets[index]
                    for jets, index in zip(flavor_jet, isr_index_jet)
                ])

                print('-> lep weights')
                lep_weight = ([
                    np.array([np.float64(event)] * len(leps[~np.isnan(leps)]))
                    for leps, event in zip(pt_lep, weight)
                ])
                # lep_weight = ([np.pad(w, (0, max_n_leps - len(w)), 'constant', constant_values=np.nan) for w in lep_weight])

                s_lep_weight = ([
                    leps[index] for leps, index in zip(lep_weight, s_index_lep)
                ])
                isr_lep_weight = ([
                    leps[index]
                    for leps, index in zip(lep_weight, isr_index_lep)
                ])

                pt_s_lep = ([
                    leps[index] for leps, index in zip(pt_lep, s_index_lep)
                ])
                pt_isr_lep = ([
                    leps[index] for leps, index in zip(pt_lep, isr_index_lep)
                ])

                pdgid_s_lep = ([
                    leps[index] for leps, index in zip(pdgid_lep, s_index_lep)
                ])
                pdgid_isr_lep = ([
                    leps[index]
                    for leps, index in zip(pdgid_lep, isr_index_lep)
                ])

                print('\napplying masks')
                print('-> jet pt')
                loose_pt_jet = ([
                    jet[mask] for jet, mask in zip(pt_jet, loose_mask)
                ])
                medium_pt_jet = ([
                    jet[mask] for jet, mask in zip(pt_jet, medium_mask)
                ])
                tight_pt_jet = ([
                    jet[mask] for jet, mask in zip(pt_jet, tight_mask)
                ])

                print('-> N S jets')
                n_s_jet = ([len(jets[~np.isnan(jets)]) for jets in pt_s_jet])
                n_s_loose_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_s_jet, loose_s_mask)
                ])
                n_s_medium_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_s_jet, medium_s_mask)
                ])
                n_s_tight_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_s_jet, tight_s_mask)
                ])

                print('-> N ISR jets')
                n_isr_jet = ([
                    len(jets[~np.isnan(jets)]) for jets in pt_isr_jet
                ])
                n_isr_loose_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_isr_jet, loose_isr_mask)
                ])
                n_isr_medium_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_isr_jet, medium_isr_mask)
                ])
                n_isr_tight_jet = ([
                    len(jets[mask])
                    for jets, mask in zip(pt_isr_jet, tight_isr_mask)
                ])

                print('-> N S leps')
                n_s_lep = ([len(leps[~np.isnan(leps)]) for leps in pt_s_lep])

                print('-> N ISR leps')
                n_isr_lep = ([
                    len(leps[~np.isnan(leps)]) for leps in pt_isr_lep
                ])

                print('-> Event variables')
                ptcm_div_ptisr = np.divide(ptcm, ptisr)

                print('-> jet weights')
                #                loose_weight = weight[is_loose]
                #                medium_weight = weight[is_medium]
                #                tight_weight = weight[is_tight]
                #
                #                loose_jet_weight = ([w[mask] for w, mask in zip(jet_weight, loose_mask)])
                #                medium_jet_weight = ([w[mask] for w, mask in zip(jet_weight, medium_mask)])
                #                tight_jet_weight = ([w[mask] for w, mask in zip(jet_weight, tight_mask)])
                #
                #                loose_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, loose_s_mask)])
                #                medium_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, medium_s_mask)])
                #                tight_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, tight_s_mask)])
                #
                #                loose_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, loose_isr_mask)])
                #                medium_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, medium_isr_mask)])
                #                tight_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, tight_isr_mask)])

                print('-> Overall selection mask')
                evt_selection_mask = ([
                    True if np.all([lep_mask, b_mask]) else False
                    for lep_mask, b_mask in zip(only_2_leps, is_medium)
                ])

                risr = risr[evt_selection_mask]
                ptisr = ptisr[evt_selection_mask]
                ptcm = ptcm[evt_selection_mask]
                met = met[evt_selection_mask]

                lep_weight = lep_weight[evt_selection_mask]
                pdgid_lep = pdgid_lep[evt_selection_mask]
                ch_lep = ch_lep[evt_selection_mask]
                flavor_jet = flavor_jet[evt_selection_mask]
                flavor_s_jet = flavor_s_jet[evt_selection_mask]
                flavor_isr_jet = flavor_isr_jet[evt_selection_mask]
                pdgid_s_lep = pdgid_s_lep[evt_selection_mask]
                pdgid_isr_lep = pdgid_isr_lep[evt_selection_mask]

                dphi = dphi[evt_selection_mask]
                ptcm_div_ptisr = ptcm_div_ptisr[evt_selection_mask]

                n_s_jet = n_s_jet[evt_selection_mask]
                n_s_loose_jet = n_s_loose_jet[evt_selection_mask]
                n_s_medium_jet = n_s_medium_jet[evt_selection_mask]
                n_s_tight_jet = n_s_tight_jet[evt_selection_mask]

                n_s_lep = n_s_lep[evt_selection_mask]

                n_isr_jet = n_isr_jet[evt_selection_mask]
                n_isr_loose_jet = n_isr_loose_jet[evt_selection_mask]
                n_isr_medium_jet = n_isr_medium_jet[evt_selection_mask]
                n_isr_tight_jet = n_isr_tight_jet[evt_selection_mask]

                n_isr_lep = n_isr_lep[evt_selection_mask]

                len_jet = len_jet[evt_selection_mask]
                len_lep = len_lep[evt_selection_mask]
                only_lep_weight = weight
                weight = weight[evt_selection_mask]
                s_jet_weight = s_jet_weight[evt_selection_mask]
                isr_jet_weight = isr_jet_weight[evt_selection_mask]
                s_lep_weight = s_lep_weight[evt_selection_mask]
                isr_lep_weight = isr_lep_weight[evt_selection_mask]

                print('done applying masks')
                print('\nfilling histograms')

                if not np.any(evt_selection_mask):
                    print('finished filling')
                    continue


#                rnp.fill_hist(hist[sample][tree_name]['MET'], met, weight)
#                rnp.fill_hist(hist[sample][tree_name]['S_Flavor_jet'], flavor_s_jet, s_jet_weight)
#                rnp.fill_hist(hist[sample][tree_name]['ISR_Flavor_jet'], flavor_isr_jet, isr_jet_weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['S_Flavor_lep'], pdgid_s_lep, s_lep_weight)
#                rnp.fill_hist(hist[sample][tree_name]['ISR_Flavor_lep'], pdgid_isr_lep, isr_lep_weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['Lep_to_Charge'], np.swapaxes([pdgid_lep, ch_lep],0,1), lep_weight)
#                rnp.fill_hist(hist[sample][tree_name]['Lep_to_Lep'], np.swapaxes([only_2_leps, only_2_opp_leps],0,1), only_lep_weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['RISR'], risr, weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTISR'], ptisr, weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTCM'], ptcm, weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['RISR_PTCM'], np.swapaxes([risr,ptcm],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['RISR_PTISR'], np.swapaxes([risr,ptisr],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_PTCM'], np.swapaxes([ptisr,ptcm],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['dphi_PTCM'], np.swapaxes([dphi,ptcm],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['dphi_PTCM_div_PTISR'], np.swapaxes([div_dphi,ptcm_div_ptisr],0,1), div_weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_div_PTISR'], ptcm_div_ptisr, div_weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_jet'], np.swapaxes([n_s_jet,n_isr_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_lep'], np.swapaxes([n_s_lep,n_isr_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_loose_jet'], np.swapaxes([n_s_loose_jet,n_isr_loose_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_medium_jet'], np.swapaxes([n_s_medium_jet,n_isr_medium_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_tight_jet'], np.swapaxes([n_s_tight_jet,n_isr_tight_jet],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_jet'], np.swapaxes([risr,len_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_lep'], np.swapaxes([risr,len_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_S_jet'], np.swapaxes([risr,n_s_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_S_lep'], np.swapaxes([risr,n_s_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_ISR_jet'], np.swapaxes([risr,n_isr_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['RISR_N_ISR_lep'], np.swapaxes([risr,n_isr_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_jet'], np.swapaxes([ptisr,len_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_lep'], np.swapaxes([ptisr,len_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_S_jet'], np.swapaxes([ptisr,n_s_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_S_lep'], np.swapaxes([ptisr,n_s_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_ISR_jet'], np.swapaxes([ptisr,n_isr_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTISR_N_ISR_lep'], np.swapaxes([ptisr,n_isr_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_jet'], np.swapaxes([ptcm,len_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_lep'], np.swapaxes([ptcm,len_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_S_jet'], np.swapaxes([ptcm,n_s_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_S_lep'], np.swapaxes([ptcm,n_s_lep],0,1), weight)
#
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_ISR_jet'], np.swapaxes([ptcm,n_isr_jet],0,1), weight)
#                rnp.fill_hist(hist[sample][tree_name]['PTCM_N_ISR_lep'], np.swapaxes([ptcm,n_isr_lep],0,1), weight)

                print('finished filling')
    return hist
Exemple #29
0
  def run(self):
    print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: Running the analyzer...')
    self.print_timestamp()
    for ifile, filename in enumerate(self._input_files):
      print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: FILE: {}/{}. Getting branches from file...'.format(ifile, len(self._input_files)))
      tree = uproot.open(filename)['Events']
      self._bu_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._bu_branchnames).items()}
      self._event_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._event_branchnames).items()}
      self._muon_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._muon_branchnames).items()}
      self._track_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._track_branchnames).items()}
      self._gen_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._gen_branchnames).items()}

      print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: FILE: {}/{}. Analyzing...'.format(ifile, len(self._input_files)))

      # Muon information
      self._muon_branches["Muon_isTriggeringBool"] = (self._muon_branches["Muon_isTriggering"] == 1)
      fill_hist(self._mu_histograms["nMuon"], self._muon_branches["Muon_pt"].count())
      fill_hist(self._mu_histograms["nMuon_isTrig"], self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]].count())
      fill_hist(self._mu_histograms["Muon_pt"], self._muon_branches["Muon_pt"].flatten())
      fill_hist(self._mu_histograms["Muon_pt_isTrig"], self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]].flatten())

      # Tag/probe determination
      isTrig_mu1 = self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l1Idx"]] # shape=BToKMuMu
      isTrig_mu2 = self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l2Idx"]] # shape=BToKMuMu
      bu_trig_count = isTrig_mu1 + isTrig_mu2 # shape=BToKMuMu
      total_trig_count = self._muon_branches["Muon_isTriggering"].sum() # shape=Event simple array
      total_trig_count_bushape = bu_trig_count.ones_like() * total_trig_count
      tag_count = total_trig_count_bushape - bu_trig_count
      self._bu_branches["BToKMuMu_isTag"] = (isTrig_mu1 == 1) | (isTrig_mu2 == 1)
      self._bu_branches["BToKMuMu_isProbe"] = (tag_count >= 1)

      if ifile == 0:
        print("Muon debug info:")
        print(self._event_branches["nMuon"])
        print(self._muon_branches["Muon_pt"].count()[:6])
        print(self._muon_branches["Muon_pt"][:6])
        #print(self._muon_branches["Muon_isTriggering"])
        print(self._muon_branches["Muon_isTriggeringBool"][:6])
        print (self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]][:6])

        print("BToKMuMu_l1Idx = ")
        print(self._bu_branches["BToKMuMu_l1Idx"][:6])
        print("BToKMuMu_l2Idx = ")
        print(self._bu_branches["BToKMuMu_l2Idx"][:6])
        print("Total_trig_count = ")
        print(self._muon_branches["Muon_isTriggering"].sum()[:6])
        print("isTrig_mu1 = ")
        print(self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l1Idx"]][:6])
        print("isTrig_mu2 = ")
        print(self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l2Idx"]][:6])

        print("bu_trig_count = ")
        print(bu_trig_count[:6])
        print("total_trig_count_bushape = ")
        print(total_trig_count_bushape[:6])
        print("tag_count = ")
        print(tag_count[:6])
        print "isTag:"
        print(self._bu_branches["BToKMuMu_isTag"][:6])
        print "isProbe:"
        print(self._bu_branches["BToKMuMu_isProbe"][:6])

      # MC truth matching
      self._bu_branches["BToKMuMu_l1_genIdx"] = self._muon_branches["Muon_genPartIdx"][self._bu_branches["BToKMuMu_l1Idx"]] 
      self._bu_branches["BToKMuMu_l2_genIdx"] = self._muon_branches["Muon_genPartIdx"][self._bu_branches["BToKMuMu_l2Idx"]] 
      self._bu_branches['BToKMuMu_k_genIdx']  = self._track_branches['ProbeTracks_genPartIdx'][self._branches['BToKMuMu_kIdx']]

      self._bu_branches['BToKMuMu_l1_genMotherIdx'] = where(self._bu_branches["BToKMuMu_l1_genIdx"] >= 0, 
                                                                self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_l1_genIdx"]], 
                                                                -1)
      self._bu_branches['BToKMuMu_l2_genMotherIdx'] = where(self._bu_branches["BToKMuMu_l2_genIdx"] >= 0, 
                                                                self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_l2_genIdx"]], 
                                                                -1)
      self._bu_branches['BToKMuMu_k_genMotherIdx'] = where(self._bu_branches["BToKMuMu_k_genIdx"] >= 0, 
                                                                self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_k_genIdx"]], 
                                                                -1)

      self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] = where(self._bu_branches['BToKMuMu_l1_genMotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches['BToKMuMu_l1_genMotherIdx']], 
                                                                -1)
      self._bu_branches['BToKMuMu_l2_genGrandmotherIdx'] = where(self._bu_branches['BToKMuMu_l2_genMotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches['BToKMuMu_l2_genMotherIdx']], 
                                                                -1)

      self._bu_branches['BToKMuMu_l1_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_l1_genMotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l1_genMotherIdx']],
                                                                -1)
      self._bu_branches['BToKMuMu_l2_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_l2_genMotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l2_genMotherIdx']],
                                                                -1)
      self._bu_branches['BToKMuMu_k_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_k_genMotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_k_genMotherIdx']],
                                                                -1)

      self._bu_branches['BToKMuMu_l1_genGrandmotherPdgId'] = where(self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l1_genGrandmotherIdx']],
                                                                -1)
      self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] = where(self._bu_branches['BToKMuMu_l2_genGrandmotherIdx'] >= 0, 
                                                                self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l2_genGrandmotherIdx']],
                                                                -1)

      self._bu_branches['BToKMuMu_mcmatch'] = (self._bu_branches['BToKMuMu_l1_genMotherPdgId'] == 443) 
                                              & (self._bu_branches['BToKMuMu_l2_genMotherPdgId'] == 443) 
                                              & (self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] == 521) 
                                              & (self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] == 521) 
                                              & (self._bu_branches['BToKMuMu_k_genMotherPdgId'] == 521) 
                                              & (self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] == self._bu_branches['BToKMuMu_l2_genGrandmotherIdx']) 
                                              & (self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] == self._bu_branches['BToKMuMu_k_genGrandmotherIdx']) 

      self._bu_branches["BToKMuMu_genPartIdx"] = where(self._bu_branches['BToKMuMu_mcmatch'], self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'], -1)

      self._butruth_branches = {}
      self._butruth_branches["TruthBToKMuMu_RecoIdx"] = self._gen_branches["GenPart_pdg"]



      # Add trigger decision to Bs candidates
      self._bu_branches["BToKMuMu_{}".format(self._trigger)] = np.repeat(self._event_branches[self._trigger], self._event_branches["nBToKMuMu"])

      # Print out length of arrays
      #for branch, array in self._bu_branches.items():
      #  print("{}\t{}".format(len(array.flatten()), branch))

      # flatten the jagged arrays to a normal numpy array, turn the whole dictionary to pandas dataframe
      self._bu_branches = pd.DataFrame.from_dict({branch: array.flatten() for branch, array in self._bu_branches.items()})

      # Minimum lepton pT
      self._bu_branches["BToKMuMu_fit_l_minpt"] = np.minimum(self._bu_branches["BToKMuMu_fit_l1_pt"], self._bu_branches["BToKMuMu_fit_l2_pt"])

      # general selection
      trigger_selection = self._bu_branches['BToKMuMu_{}'.format(self._trigger)]
      tag_selection = self._bu_branches["BToKMuMu_isTag"] & trigger_selection
      probe_selection = self._bu_branches["BToKMuMu_isProbe"] & trigger_selection

      sv_selection = (self._bu_branches['BToKMuMu_fit_pt'] > 3.0) \
                      & (np.abs(self._bu_branches['BToKMuMu_l_xy'] / self._bu_branches['BToKMuMu_l_xy_unc']) > 3.0 ) \
                      & (self._bu_branches['BToKMuMu_svprob'] > 0.1) \
                      & (self._bu_branches['BToKMuMu_fit_cos2D'] > 0.9)

      l1_selection = (self._bu_branches['BToKMuMu_fit_l1_pt'] > 1.5) \
                      & (np.abs(self._bu_branches['BToKMuMu_fit_l1_eta']) < 2.4)
      l2_selection = (self._bu_branches['BToKMuMu_fit_l2_pt'] > 1.5) \
                      & (np.abs(self._bu_branches['BToKMuMu_fit_l2_eta']) < 2.4)
      k_selection = (self._bu_branches['BToKMuMu_fit_k_pt'] > 0.5) \
                      & (np.abs(self._bu_branches['BToKMuMu_fit_k_eta']) < 2.5)

      jpsi_selection = (JPSI_1S_MASS - 0.2 < self._bu_branches['BToKMuMu_mll_fullfit']) & (self._bu_branches['BToKMuMu_mll_fullfit'] < JPSI_1S_MASS + 0.2)


      bu_selection = sv_selection & l1_selection & l2_selection & k_selection & jpsi_selection

      #print("N trigger_selection = {}".format(trigger_selection.sum()))
      #print("N tag_selection = {}".format(tag_selection.sum()))
      #print("N probe_selection = {}".format(probe_selection.sum()))
      #print("N sv_selection = {}".format(sv_selection.sum()))
      #print("N l1_selection = {}".format(l1_selection.sum()))
      #print("N l2_selection = {}".format(l2_selection.sum()))
      #print("N k_selection = {}".format(k_selection.sum()))
      #print("N jpsi_selection = {}".format(jpsi_selection.sum()))
      #print("N bu_selection = {}".format(bu_selection.sum()))

      '''

      if self._isMC:
        pass
        mc_matched_selection = (self._branches['BsToKKMuMu_l1_genPartIdx'] >= 0) \
                                & (self._branches['BsToKKMuMu_l2_genPartIdx'] >= 0) \
                                & (self._branches['BsToKKMuMu_k_genPartIdx'] >= 0)
        # B->K J/psi(ee)
        #mc_parent_selection = (abs(self._branches['BsToKKMuMu_l1_genMotherPdgId']) == 443) & (abs(self._branches['BsToKKMuMu_k_genMotherPdgId']) == 521)
        #mc_chain_selection = (self._branches['BsToKKMuMu_l1_genMotherPdgId'] == self._branches['BsToKKMuMu_l2_genMotherPdgId']) & (self._branches['BsToKKMuMu_k_genMotherPdgId'] == self._branches['BsToKKMuMu_l1Mother_genMotherPdgId']) & (self._branches['BsToKKMuMu_k_genMotherPdgId'] == self._branches['BsToKKMuMu_l2Mother_genMotherPdgId'])

        # B->K*(K pi) J/psi(ee)
        mc_parent_selection = (abs(self._branches['BsToKKMuMu_l1_genMotherPdgId']) == 443) & (abs(self._branches['BsToKKMuMu_k_genMotherPdgId']) == 313)
        mc_chain_selection = (self._branches['BsToKKMuMu_l1_genMotherPdgId'] == self._branches['BsToKKMuMu_l2_genMotherPdgId'])
        mc_selection = mc_matched_selection & mc_parent_selection & mc_chain_selection

      #additional_selection = b_sb_selection
      if self._isMC:
        selection = l1_selection & l2_selection & k_selection & mc_selection

      else:
        selection = l1_selection & l2_selection & k_selection
      '''

      for tag_type in ["inclusive", "triggered", "tag", "probe"]:
        this_selection = copy.deepcopy(bu_selection)
        if tag_type == "triggered":
          this_selection &= trigger_selection
        elif tag_type == "tag":
          this_selection &= tag_selection
        elif tag_type == "probe":
          this_selection &= probe_selection

        #print("tag_type {}".format(tag_type))
        #print("\tthis_selection.count = {}".format(this_selection.sum()))

        selected_branches = self._bu_branches[this_selection]

        fill_hist(self._histograms[tag_type]['BToKMuMu_chi2'], selected_branches['BToKMuMu_chi2'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_eta'], selected_branches['BToKMuMu_eta'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_cos2D'], selected_branches['BToKMuMu_fit_cos2D'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_eta'], selected_branches['BToKMuMu_fit_eta'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_mass'], selected_branches['BToKMuMu_fit_mass'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_phi'], selected_branches['BToKMuMu_fit_phi'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_pt'], selected_branches['BToKMuMu_fit_pt'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_l_xy'], selected_branches['BToKMuMu_l_xy'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_l_xy_sig'], selected_branches['BToKMuMu_l_xy'].values / selected_branches['BToKMuMu_l_xy_unc'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_eta'], selected_branches['BToKMuMu_fit_l1_eta'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_phi'], selected_branches['BToKMuMu_fit_l1_phi'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_pt'], selected_branches['BToKMuMu_fit_l1_pt'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_eta'], selected_branches['BToKMuMu_fit_l2_eta'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_phi'], selected_branches['BToKMuMu_fit_l2_phi'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_pt'], selected_branches['BToKMuMu_fit_l2_pt'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_mass'], selected_branches['BToKMuMu_mass'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_mll_fullfit'], selected_branches['BToKMuMu_mll_fullfit'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_mll_llfit'], selected_branches['BToKMuMu_mll_llfit'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_mll_raw'], selected_branches['BToKMuMu_mll_raw'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_phi'], selected_branches['BToKMuMu_phi'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_pt'], selected_branches['BToKMuMu_pt'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_svprob'], selected_branches['BToKMuMu_svprob'].values)
        fill_hist(self._histograms[tag_type]['BToKMuMu_charge'], selected_branches['BToKMuMu_charge'].values)

        fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l_minpt'], selected_branches["BToKMuMu_fit_l_minpt"].values)
      # End loop tag_Type
      
      # Debug absence of low-pT probes
      if ifile == 0:
        select_lowpt = (self._bu_branches["BToKMuMu_fit_pt"]  < 10.)
        select_lowpt_probe = (self._bu_branches["BToKMuMu_fit_pt"]  < 10.) & self._bu_branches["BToKMuMu_isProbe"]
        print("pT of Bus with pT<10 and isProbe")
        print(self._bu_branches["BToKMuMu_fit_pt"][select_lowpt_probe])
      
      # Cutflow
      cutflow_selection = np.ones_like(self._bu_branches["BToKMuMu_chi2"], dtype=int)
      self._cutflow_counts["Inclusive"] += cutflow_selection.sum()

      self._cutflow_counts[self._trigger] += trigger_selection.sum()

      # Inclusive branch
      cutflow_selection_inclusive = copy.deepcopy(cutflow_selection)
      cutflow_selection_inclusive &= sv_selection
      self._cutflow_counts["Inclusive SV"] += cutflow_selection_inclusive.sum()

      cutflow_selection_inclusive &= l1_selection & l2_selection & k_selection
      self._cutflow_counts["Inclusive mu-K"] += cutflow_selection_inclusive.sum()

      cutflow_selection_inclusive &= jpsi_selection
      self._cutflow_counts["Inclusive Jpsi"] += cutflow_selection_inclusive.sum()

      # Tag branch
      cutflow_selection_tag =  cutflow_selection & tag_selection
      self._cutflow_counts["Tag"] += cutflow_selection_tag.sum()

      cutflow_selection_tag = cutflow_selection_tag & sv_selection
      self._cutflow_counts["Tag SV"] += cutflow_selection_tag.sum()

      cutflow_selection_tag = cutflow_selection_tag & l1_selection & l2_selection & k_selection
      self._cutflow_counts["Tag mu-K"] += cutflow_selection_tag.sum()

      cutflow_selection_tag = cutflow_selection_tag & jpsi_selection
      self._cutflow_counts["Tag Jpsi"] += cutflow_selection_tag.sum()

      # Probe branch
      cutflow_selection_probe =  cutflow_selection & probe_selection
      self._cutflow_counts["Probe"] += cutflow_selection_probe.sum()

      cutflow_selection_probe = cutflow_selection_probe & sv_selection
      self._cutflow_counts["Probe SV"] += cutflow_selection_probe.sum()

      cutflow_selection_probe = cutflow_selection_probe & l1_selection & l2_selection & k_selection
      self._cutflow_counts["Probe mu-K"] += cutflow_selection_probe.sum()

      cutflow_selection_probe = cutflow_selection_probe & jpsi_selection
      self._cutflow_counts["Probe Jpsi"] += cutflow_selection_probe.sum()
def main(args):
    
    directed = False
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'training_data', 'single_photon')
    full_dataset = HitGraphDatasetG(path, directed=directed)
    fulllen = len(full_dataset)
    tv_frac = 0.10
    tv_num = math.ceil(fulllen*tv_frac)
    splits = np.cumsum([fulllen-2*tv_num,tv_num,tv_num])
    
    test_dataset = torch.utils.data.Subset(full_dataset,np.arange(start=splits[0],stop=splits[1]))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    test_samples = len(test_dataset)

    d = full_dataset
    num_features = d.num_features
    num_classes = d[0].y.max().item() + 1 if d[0].y.dim() == 1 else d[0].y.size(1)
    
    model = EdgeNet(input_dim=num_features,hidden_dim=hidden_dim,n_iters=n_iters).to(device)
    model_fname = args.model
    print('Model: \n%s\nParameters: %i' %
          (model, sum(p.numel()
                      for p in model.parameters())))
    print('Testing with %s samples'%test_samples)
    
    model.load_state_dict(torch.load(model_fname))

    test_loss, test_acc, test_eff, test_fp, test_fn, test_pur = test(model, test_loader, test_samples)
    print('Testing: Loss: {:.4f}, Eff.: {:.4f}, FalsePos: {:.4f}, FalseNeg: {:.4f}, Purity: {:,.4f}'.format(test_loss, test_eff,
                                                                                                            test_fp, test_fn, test_pur))


    # plotting:
    figs = []
    out = []
    y = []
    x = []
    edge_index = []
    #simmatched = []
    for i,data in enumerate(test_loader):
        x.append(data.x.cpu().detach().numpy())
        y.append(data.y.cpu().detach().numpy())
        edge_index.append(data.edge_index.cpu().detach().numpy())
        #simmatched.append(data.sim_matched.cpu().detach().numpy())
        data = data.to(device)
        out.append(model(data).cpu().detach().numpy())

    print("Processing awkward arrays...")    
    out = awkward.fromiter(out)
    x = awkward.fromiter(x)
    y = awkward.fromiter(y)
    #simmatched = awkward.fromiter(simmatched) 
    edge_index = awkward.fromiter(edge_index)
    
    predicted_edge = (out > 0.5)
    truth_edge = (y > 0.5)
    node_energy = x[:,:,4]
    node_layer = x[:,:,2]

    predicted_connected_node_indices = awkward.JaggedArray.concatenate([edge_index[:,0][predicted_edge], edge_index[:,1][predicted_edge]], axis=1)
    predicted_connected_node_indices = awkward.fromiter(map(np.unique, predicted_connected_node_indices))
    predicted_energy_sum = node_energy[predicted_connected_node_indices].sum()
    truth_connected_node_indices = awkward.JaggedArray.concatenate([edge_index[:,0][truth_edge],edge_index[:,1][truth_edge]], axis=1)
    truth_connected_node_indices = awkward.fromiter(map(np.unique, truth_connected_node_indices))
    truth_energy_sum = node_energy[truth_connected_node_indices].sum()
    
    print ("Plotting...")
    nonzeromask = (truth_energy_sum !=0.0)
    energy_captured_ratio = predicted_energy_sum[nonzeromask]/truth_energy_sum[nonzeromask]
    
    fig,axes = plt.subplots(figsize=(12, 7))
    _, bins,_ = axes.hist(energy_captured_ratio, bins=100)
    axes.set_title("Ratio of energy sum for predicted hits/truth (preprocessed) hits")
    axes.set_ylabel("events (pos+neg)")
    axes.set_xlabel("Ratio")
    cut = energy_captured_ratio[(energy_captured_ratio>0.975) & (energy_captured_ratio < 1.025)] #NB: restricted fit
    (mu, sigma) = stats.norm.fit(cut)
    c_paras = stats.crystalball.fit(cut)
    lnspc = np.linspace(bins[0], bins[-1], len(bins))
    pdf_g = stats.norm.pdf(lnspc, mu, sigma)
    pdf_c = stats.crystalball.pdf(lnspc, *c_paras)
    pdf_g = pdf_g / pdf_g.sum() * len(energy_captured_ratio)
    pdf_c = pdf_c / pdf_c.sum() * len(energy_captured_ratio)
    axes.plot(lnspc, pdf_g, label="Norm, restricted fit")
    axes.plot(lnspc, pdf_c, label="Crystalball, restricted fit")
    axes.legend(loc='upper left')
    figs.append(fig)

    idx = 0
    print("diagnostics:", x[idx].regular().shape, edge_index[idx].regular().shape, y[idx].shape )

    #uncomment for visualisation - warning: slow
    
    #idxs = [0]
    #for idx in tqdm.tqdm(idxs):
    #    fig = draw_sample(x[idx].regular(), edge_index[idx].regular()[0], edge_index[idx].regular()[1], y[idx], out[idx])
    #    figs.append(fig)
    
    import matplotlib.backends.backend_pdf
    pdf = matplotlib.backends.backend_pdf.PdfPages("test_plots.pdf")
    for fig in figs: 
        pdf.savefig(fig)
    pdf.close()