def process(self, events): # Initialize accumulator out = self.accumulator.identity() # Event selection: opposite charged same flavor Electron = events.Electron Electron_mask = (Electron.pt > 20) & (np.abs(Electron.eta) < 2.5) & (Electron.cutBased > 1) Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1 Ele_channel_events = events[Ele_channel_mask] Ele = Ele_channel_events.Electron # All possible pairs of Electron in each event ele_pairs = ak.combinations(Ele, 2, axis=1) # TLorentz vector sum of ele_pairs ele_left, ele_right = ak.unzip(ele_pairs) diele = ele_left + ele_right diffsign_diele = diele[diele.charge == 0] leading_diffsign_diele = diffsign_diele[ak.argmax(diffsign_diele.pt, axis=1, keepdims=True)] #Mee = ak.flatten(leading_diffsign_diele.mass) # This makes type error ( primitive expected but ?float given ) Mee = ak.to_numpy(leading_diffsign_diele.mass) Mee = Mee.flatten() out.fill(dataset=events.metadata["dataset"], mass=Mee) return out
def nearest(self, other, metric=lambda a, b: a.delta_r(b), return_metric=False): """Return nearest object to this one Only works for first axis (i.e. top-level ListArrays) """ a, b = awkward1.unzip(awkward1.cartesian([self, other], nested=True)) mval = metric(a, b) mmin = awkward1.argmin(mval, axis=-1) if return_metric: return b[mmin], mval[mmin] return b[mmin]
def nearest(self, other, metric=lambda a, b: a.delta_r(b), return_metric=False): """Return nearest object to this one The default metric is `delta_r`. """ a, b = awkward1.unzip(awkward1.cartesian([self, other], nested=True)) mval = metric(a, b) mmin = awkward1.argmin(mval, axis=-1) if return_metric: return b[mmin], mval[mmin] return b[mmin]
def nearest( self, other, axis=1, metric=lambda a, b: a.delta_r(b), return_metric=False, threshold=None, ): """Return nearest object to this one Finds item in ``other`` satisfying ``min(metric(self, other))``. The two arrays should be broadcast-compatible on all axes other than the specified axis, which will be used to form a cartesian product. If axis=None, broadcast arrays directly. The return shape will be that of ``self``. Parameters ---------- other : awkward1.Array Another array with same shape in all but ``axis`` axis : int, optional The axis to form the cartesian product (default 1). If None, the metric is directly evaluated on the input arrays (i.e. they should broadcast) metric : callable A function of two arguments, returning a scalar. The default metric is `delta_r`. return_metric : bool, optional If true, return both the closest object and its metric (default false) threshold : Number, optional If set, any objects with ``metric > threshold`` will be masked from the result """ if axis is None: a, b = self, other # NotImplementedError: ak.firsts with axis=-1 axis = other.layout.purelist_depth - 2 else: a, b = awkward1.unzip( awkward1.cartesian([self, other], axis=axis, nested=True) ) mval = metric(a, b) # prefer keepdims=True: awkward-1.0 #434 mmin = awkward1.singletons(awkward1.argmin(mval, axis=axis + 1)) out = awkward1.firsts(b[mmin], axis=axis + 1) metric = awkward1.firsts(mval[mmin], axis=axis + 1) if threshold is not None: out = out.mask[metric <= threshold] if return_metric: return out, metric return out
test = matchedJets.genJetIdx combs = ak.combinations(finalJets, 3, replacement=False) t1 = (combs['0'].genJetIdx == test[:, 0]) | ( combs['0'].genJetIdx == test[:, 1]) | (combs['0'].genJetIdx == test[:, 2]) t2 = (combs['1'].genJetIdx == test[:, 0]) | ( combs['1'].genJetIdx == test[:, 1]) | (combs['1'].genJetIdx == test[:, 2]) t3 = (combs['2'].genJetIdx == test[:, 0]) | ( combs['2'].genJetIdx == test[:, 1]) | (combs['2'].genJetIdx == test[:, 2]) t = t1 & t2 & t3 trutharray = ak.flatten(t) print("matching a validation array for every combo of 3 jets") #Zipping into CSV for training jetcombos = ak.flatten(combs) j1, j2, j3 = ak.unzip(jetcombos) dR1_2 = j1.delta_r(j2) dR1_3 = j1.delta_r(j3) dR2_3 = j2.delta_r(j3) j1b_tag = j1.btagCSVV2 j2b_tag = j1.btagCSVV2 j3b_tag = j1.btagCSVV2 j1area = j1.area j2area = j2.area j3area = j3.area j12deta = j1.eta - j2.eta j23deta = j2.eta - j3.eta j13deta = j1.eta - j3.eta j12dphi = j1.phi - j2.phi j23dphi = j2.phi - j3.phi j13dphi = j1.phi - j3.phi
events = t1.arrays([ "HT", "CrossSection", "Tracks.fCoordinates.fX", "Tracks.fCoordinates.fY", "Tracks.fCoordinates.fZ", "Tracks_fromPV0", "Tracks_matchedToPFCandidate", ]) #, entry_start=99000) jetsAK15 = t2.arrays([ "jetsAK15_px", "jetsAK15_py", "jetsAK15_pz", "jetsAK15_E", ]) cut_events = events[events.HT > 1200] cut_jetsAK15 = jetsAK15[events.HT > 1200] HT, CrossSection, tracks_x, tracks_y, tracks_z, tracks_num_fromPV0, tracks_matched_to_candidate = ak.unzip( cut_events) tracks_pt = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2) tracks_eta = np.arcsinh(tracks_z / np.sqrt(tracks_x**2 + tracks_y**2)) tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2) tracks_cut = (tracks_pt > 1) & abs( tracks_eta < 2.5) & (tracks_num_fromPV0 >= 2) & tracks_matched_to_candidate s = eventShapesUtilities.sphericityTensor_uproot4(tracks)
def test_zip(): x = awkward1.Array([[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]) y = awkward1.Array([1.1, 2.2, 3.3, 4.4, 5.5]) one = awkward1.zip({"x": x, "y": y}) two = awkward1.zip({"x": x, "y": y}, depth_limit=1) xx, yy = awkward1.unzip(two) assert isinstance(one.layout, awkward1.layout.Content) assert isinstance(two.layout, awkward1.layout.Content) assert isinstance(xx.layout, awkward1.layout.Content) assert isinstance(yy.layout, awkward1.layout.Content) assert awkward1.to_list(one) == [[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 1.1 }, { "x": 3, "y": 1.1 }], [], [{ "x": 4, "y": 3.3 }, { "x": 5, "y": 3.3 }], [{ "x": 6, "y": 4.4 }], [{ "x": 7, "y": 5.5 }, { "x": 8, "y": 5.5 }, { "x": 9, "y": 5.5 }, { "x": 10, "y": 5.5 }]] assert awkward1.to_list(two) == [{ "x": [1, 2, 3], "y": 1.1 }, { "x": [], "y": 2.2 }, { "x": [4, 5], "y": 3.3 }, { "x": [6], "y": 4.4 }, { "x": [7, 8, 9, 10], "y": 5.5 }] if not awkward1._util.py27 and not awkward1._util.py35: assert awkward1.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] assert awkward1.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5] x = awkward1.repartition(x, 3) assert isinstance(x.layout, awkward1.partition.PartitionedArray) assert awkward1.to_list(x) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] one = awkward1.zip({"x": x, "y": y}) two = awkward1.zip({"x": x, "y": y}, depth_limit=1) xx, yy = awkward1.unzip(two) assert isinstance(one.layout, awkward1.partition.PartitionedArray) assert isinstance(two.layout, awkward1.partition.PartitionedArray) assert isinstance(xx.layout, awkward1.partition.PartitionedArray) assert isinstance(yy.layout, awkward1.partition.PartitionedArray) assert awkward1.to_list(one) == [[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 1.1 }, { "x": 3, "y": 1.1 }], [], [{ "x": 4, "y": 3.3 }, { "x": 5, "y": 3.3 }], [{ "x": 6, "y": 4.4 }], [{ "x": 7, "y": 5.5 }, { "x": 8, "y": 5.5 }, { "x": 9, "y": 5.5 }, { "x": 10, "y": 5.5 }]] assert awkward1.to_list(two) == [{ "x": [1, 2, 3], "y": 1.1 }, { "x": [], "y": 2.2 }, { "x": [4, 5], "y": 3.3 }, { "x": [6], "y": 4.4 }, { "x": [7, 8, 9, 10], "y": 5.5 }] if not awkward1._util.py27 and not awkward1._util.py35: assert awkward1.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] assert awkward1.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5] y = awkward1.repartition(y, 2) assert isinstance(x.layout, awkward1.partition.PartitionedArray) assert awkward1.to_list(y) == [1.1, 2.2, 3.3, 4.4, 5.5] one = awkward1.zip({"x": x, "y": y}) two = awkward1.zip({"x": x, "y": y}, depth_limit=1) xx, yy = awkward1.unzip(two) assert isinstance(one.layout, awkward1.partition.PartitionedArray) assert isinstance(two.layout, awkward1.partition.PartitionedArray) assert isinstance(xx.layout, awkward1.partition.PartitionedArray) assert isinstance(yy.layout, awkward1.partition.PartitionedArray) assert awkward1.to_list(one) == [[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 1.1 }, { "x": 3, "y": 1.1 }], [], [{ "x": 4, "y": 3.3 }, { "x": 5, "y": 3.3 }], [{ "x": 6, "y": 4.4 }], [{ "x": 7, "y": 5.5 }, { "x": 8, "y": 5.5 }, { "x": 9, "y": 5.5 }, { "x": 10, "y": 5.5 }]] assert awkward1.to_list(two) == [{ "x": [1, 2, 3], "y": 1.1 }, { "x": [], "y": 2.2 }, { "x": [4, 5], "y": 3.3 }, { "x": [6], "y": 4.4 }, { "x": [7, 8, 9, 10], "y": 5.5 }] if not awkward1._util.py27 and not awkward1._util.py35: assert awkward1.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] assert awkward1.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5] x = awkward1.repartition(x, None) assert isinstance(x.layout, awkward1.layout.Content) assert awkward1.to_list(x) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] one = awkward1.zip({"x": x, "y": y}) two = awkward1.zip({"x": x, "y": y}, depth_limit=1) xx, yy = awkward1.unzip(two) assert isinstance(one.layout, awkward1.partition.PartitionedArray) assert isinstance(two.layout, awkward1.partition.PartitionedArray) assert isinstance(xx.layout, awkward1.partition.PartitionedArray) assert isinstance(yy.layout, awkward1.partition.PartitionedArray) assert awkward1.to_list(one) == [[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 1.1 }, { "x": 3, "y": 1.1 }], [], [{ "x": 4, "y": 3.3 }, { "x": 5, "y": 3.3 }], [{ "x": 6, "y": 4.4 }], [{ "x": 7, "y": 5.5 }, { "x": 8, "y": 5.5 }, { "x": 9, "y": 5.5 }, { "x": 10, "y": 5.5 }]] assert awkward1.to_list(two) == [{ "x": [1, 2, 3], "y": 1.1 }, { "x": [], "y": 2.2 }, { "x": [4, 5], "y": 3.3 }, { "x": [6], "y": 4.4 }, { "x": [7, 8, 9, 10], "y": 5.5 }] if not awkward1._util.py27 and not awkward1._util.py35: assert awkward1.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] assert awkward1.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5] y = awkward1.repartition(y, None) assert isinstance(y.layout, awkward1.layout.Content) assert awkward1.to_list(y) == [1.1, 2.2, 3.3, 4.4, 5.5] one = awkward1.zip({"x": x, "y": y}) two = awkward1.zip({"x": x, "y": y}, depth_limit=1) xx, yy = awkward1.unzip(two) assert isinstance(one.layout, awkward1.layout.Content) assert isinstance(two.layout, awkward1.layout.Content) assert isinstance(xx.layout, awkward1.layout.Content) assert isinstance(yy.layout, awkward1.layout.Content) assert awkward1.to_list(one) == [[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 1.1 }, { "x": 3, "y": 1.1 }], [], [{ "x": 4, "y": 3.3 }, { "x": 5, "y": 3.3 }], [{ "x": 6, "y": 4.4 }], [{ "x": 7, "y": 5.5 }, { "x": 8, "y": 5.5 }, { "x": 9, "y": 5.5 }, { "x": 10, "y": 5.5 }]] assert awkward1.to_list(two) == [{ "x": [1, 2, 3], "y": 1.1 }, { "x": [], "y": 2.2 }, { "x": [4, 5], "y": 3.3 }, { "x": [6], "y": 4.4 }, { "x": [7, 8, 9, 10], "y": 5.5 }] if not awkward1._util.py27 and not awkward1._util.py35: assert awkward1.to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] assert awkward1.to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = setname #events.metadata['dataset'] isData = 'genWeight' not in events.fields selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # --- Selection # << flat dim helper function >> def flat_dim(arr): sub_arr = ak.flatten(arr) mask = ~ak.is_none(sub_arr) return ak.to_numpy(sub_arr[mask]) # << drop na helper function >> def drop_na(arr): mask = ~ak.is_none(arr) return arr[mask] # << drop na helper function >> def drop_na_np(arr): mask = ~np.isnan(arr) return arr[mask] # double lepton trigger is_double_ele_trigger=True if not is_double_ele_trigger: double_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path] # single lepton trigger is_single_ele_trigger=True if not is_single_ele_trigger: single_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path] Initial_events = events print("#### Initial events: ",Initial_events) #events = events[single_ele_triggers_arr | double_ele_triggers_arr] events = events[double_ele_triggers_arr] ##----------- Cut flow1: Passing Triggers cut1 = np.ones(len(events)) print("#### cut1: ",len(cut1)) # Particle Identification Electron = events.Electron def Electron_selection(ele): return(ele.pt > 25) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 2) # Electron channel Electron_mask = Electron_selection(Electron) Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1 Ele_channel_events = events[Ele_channel_mask] ##----------- Cut flow2: Electron channel cut2 = np.ones(len(Ele_channel_events)) * 2 print("#### cut2: ",len(cut2)) # --- Calculate Scale factor weight if not isData: # PU weight with lookup table <-- On developing --> #get_pu_weight = self._corrections['get_pu_weight'][self._year] #pu = get_pu_weight(events.Pileup.nTrueInt) get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year] get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year] get_ele_trig_leg1_SF = self._corrections['get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections['get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections['get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections['get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections['get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections['get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(Ele_channel_events.Pileup.nTrueInt,"int64") pu = self._puweight_arr[pu_weight_idx] nPV = Ele_channel_events.PV.npvsGood else: nPV = Ele_channel_events.PV.npvsGood # Electron array Ele = Ele_channel_events.Electron Electron_mask = Electron_selection(Ele) Ele_sel = Ele[Electron_mask] # Electron pair ele_pairs = ak.combinations(Ele_sel,2,axis=1) ele_left, ele_right = ak.unzip(ele_pairs) diele = ele_left + ele_right # OS os_mask = diele.charge == 0 os_diele = diele[os_mask] os_ele_left = ele_left[os_mask] os_ele_right = ele_right[os_mask] os_event_mask = ak.num(os_diele) > 0 Ele_os_channel_events = Ele_channel_events[os_event_mask] #selection.add('ossf',os_event_mask) # Helper function: High PT argmax def make_leading_pair(target,base): return target[ak.argmax(base.pt,axis=1,keepdims=True)] # -- Only Leading pair -- leading_diele = make_leading_pair(diele,diele) leading_ele = make_leading_pair(ele_left,diele) subleading_ele= make_leading_pair(ele_right,diele) # -- Scale Factor for each electron def Trigger_Weight(eta1,pt1,eta2,pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data/per_ev_MC if not isData: ele_loose_id_sf = get_ele_loose_id_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_loose_id_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele ID SC---->",ele_loose_id_sf) ele_reco_sf = get_ele_reco_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_reco_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele RECO SC---->",ele_reco_sf) eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) ele_trig_weight = Trigger_Weight(eta1,pt1,eta2,pt2) print("#### Test print trigger weight ####") print(ele_trig_weight) # --OS and Leading pair -- leading_os_diele = make_leading_pair(os_diele,os_diele) leading_os_ele = make_leading_pair(os_ele_left,os_diele) subleading_os_ele= make_leading_pair(os_ele_right,os_diele) ##----------- Cut flow3: OSSF cut3 = np.ones(len(flat_dim(leading_os_diele))) * 3 print("#### cut3: ",len(cut3)) # Helper function: Zmass window def makeZmass_window_mask(dielecs,start=60,end=120): mask = (dielecs.mass >= start) & (dielecs.mass <= end) return mask # -- OS and Leading pair -- Zmass_mask_os = makeZmass_window_mask(leading_os_diele) leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os] subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os] leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os] # for masking Zmass_event_mask = makeZmass_window_mask(leading_diele) Zmass_os_event_mask= ak.flatten(os_event_mask * Zmass_event_mask) Ele_Zmass_os_events = Ele_channel_events[Zmass_os_event_mask] ##----------- Cut flow4: Zmass cut4 = np.ones(len(flat_dim(leading_os_Zwindow_diele))) * 4 print("#### cut4: ",len(cut4)) ## << Selection method -- Need validation >> #print("a--->",len(Ele_channel_events)) #print("b--->",len(Ele_os_channel_events)) #print("b2--->",len(cut3)) #print("c--->",len(Ele_Zmass_os_events)) #print("c2--->",len(cut4)) ele1PT = flat_dim(leading_os_Zwindow_ele.pt) ele1Eta = flat_dim(leading_os_Zwindow_ele.eta) ele1Phi = flat_dim(leading_os_Zwindow_ele.phi) ele2PT = flat_dim(subleading_os_Zwindow_ele.pt) ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta) ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi) Mee = flat_dim(leading_os_Zwindow_diele.mass) charge = flat_dim(leading_os_Zwindow_diele.charge) # --- Apply weight and hist weights = processor.Weights(len(cut2)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr !=0 return ak.to_numpy(subarr[mask2]) cuts = ak.flatten(Zmass_mask_os) if not isData: weights.add('pileup',pu) weights.add('ele_id',ele_loose_id_sf) weights.add('ele_reco',ele_reco_sf) #weights.add('ele_trigger',ele_trig_weight) # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0,cut1,cut2,cut3,cut4]: out["cutflow"].fill( dataset = dataset, cutflow=cut ) # Primary vertex out['nPV'].fill( dataset=dataset, nPV = nPV, weight = weights.weight() ) out['nPV_nw'].fill( dataset=dataset, nPV_nw = nPV ) # Physics varibles passing Zwindow out["mass"].fill( dataset=dataset, mass=Mee, weight = skim_weight(weights.weight() * cuts) ) out["ele1pt"].fill( dataset=dataset, ele1pt=ele1PT, weight = skim_weight(weights.weight() * cuts) ) out["ele1eta"].fill( dataset=dataset, ele1eta=ele1Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele1phi"].fill( dataset=dataset, ele1phi=ele1Phi, weight = skim_weight(weights.weight() * cuts) ) out["ele2pt"].fill( dataset=dataset, ele2pt=ele2PT, weight = skim_weight(weights.weight() * cuts) ) out["ele2eta"].fill( dataset=dataset, ele2eta=ele2Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele2phi"].fill( dataset=dataset, ele2phi=ele2Phi, weight = skim_weight(weights.weight() * cuts) ) return out
def parse_to_parquet( base_output_filename: Union[Path, str], store_only_necessary_columns: bool, input_filename: Union[Path, str], events_per_chunk: int, parser: str = "pandas", max_chunks: int = -1, compression: str = "zstd", compression_level: Optional[int] = None) -> Iterator[ak.Array]: """ Parse the JETSCAPE ASCII and convert it to parquet, (potentially) storing only the minimum necessary columns. Args: base_output_filename: Basic output filename. Should include the entire path. store_only_necessary_columns: If True, store only the necessary columns, rather than all of them. input_filename: Filename of the input JETSCAPE ASCII file. events_per_chunk: Number of events to be read per chunk. parser: Name of the parser. Default: "pandas". max_chunks: Maximum number of chunks to read. Default: -1. compression: Compression algorithm for parquet. Default: "zstd". Options include: ["snappy", "gzip", "ztsd"]. "gzip" is slightly better for storage, but slower. See the compression tests and parquet docs for more. compression_level: Compression level for parquet. Default: `None`, which lets parquet choose the best value. Returns: None. The parsed events are stored in parquet files. """ # Validation base_output_filename = Path(base_output_filename) # Setup the base output filename if events_per_chunk > 0: base_output_filename = base_output_filename / base_output_filename.name base_output_filename.parent.mkdir(parents=True, exist_ok=True) for i, arrays in enumerate( read(filename=input_filename, events_per_chunk=events_per_chunk, parser=parser)): # Reduce to the minimum required data. if store_only_necessary_columns: arrays = full_events_to_only_necessary_columns_E_px_py_pz(arrays) # We limit the depth of the zip to ensure that we can write the parquet successfully. # (parquet can't handle lists of structs at the moment). Later, we'll recreate this # structure fully zipped together. ak.zip(dict(zip(ak.fields(arrays), ak.unzip(arrays))), depth_limit=1) # Parquet with zlib seems to do about the same as ascii tar.gz when we drop unneeded columns. # And it should load much faster! if events_per_chunk > 0: suffix = base_output_filename.suffix output_filename = ( base_output_filename.parent / f"{base_output_filename.stem}_{i:02}").with_suffix(suffix) else: output_filename = base_output_filename ak.to_parquet( arrays, output_filename, compression=compression, compression_level=compression_level, # We run into a recursion limit or crash if there's a cut and we don't explode records. Probably a bug... # But it works fine if we explored records, so fine for now. explode_records=True, ) # Break now so we don't have to read the next chunk. if (i + 1) == max_chunks: break
#Open ROOT file from Pythia Delphes production with FCCSW file = uproot.open(f"{path}/output/rootfiles/FCCDelphesOutput_100events.root") tree = file['events'] #Get TTree from file #Get the charged tracks tr = tree.arrays(filter_name="pfcharged.core*") tr["p"] = np.sqrt(tr["pfcharged.core.p4.px"]**2 + tr["pfcharged.core.p4.py"]**2 + tr["pfcharged.core.p4.pz"]**2) tr["pt"] = np.sqrt(tr["pfcharged.core.p4.px"]**2 + tr["pfcharged.core.p4.py"]**2) tr['eta'] = np.log((tr['p'] + tr['pfcharged.core.p4.pz'])/(tr['p'] - tr['pfcharged.core.p4.pz']))/2 tr['phi'] = np.arctan2(tr['pfcharged.core.p4.py'], tr['pfcharged.core.p4.px']) #Pions pi_cut = abs(tr["pfcharged.core.pdgId"]) == 211 pi = tr[pi_cut] #Number of pions in each event pi_sum = ak.num(pi_cut) #Keep events with 2 or more pions pi = pi[pi_sum >= 2] #Make pion pairs per event pi_pairs = ak.combinations(pi, 2) #pt of first pion pair from first event #print(pi_pairs[0]["pt"][0]) pi1, pi2 = ak.unzip(pi_pairs) m_pipi = calc_invariant_mass(pi1, pi2) plt.hist(ak.flatten(m_pipi),bins=400) plt.show()
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = setname #events.metadata['dataset'] isData = 'genWeight' not in events.fields selection = processor.PackedSelection() # --- Calculate Scale factor weight if not isData: # PU weight get_pu_weight = self._corrections['get_pu_weight'][self._year] pu = get_pu_weight(events.Pileup.nTrueInt) # --- Selection # flat dim helper function def flat_dim(arr): sub_arr = ak.flatten(arr) mask = ~ak.is_none(sub_arr) return ak.to_numpy(sub_arr[mask]) # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] #print("Double Electron triggers") #print(double_ele_triggers_arr,len(double_ele_triggers_arr)) # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] #print("Single Electron triggers") #print(single_ele_triggers_arr,len(single_ele_triggers_arr)) Initial_events = events print("{0} number of events are detected".format(len(Initial_events))) events = events[single_ele_triggers_arr | double_ele_triggers_arr] print( "Total {0} number of events are remain after triggger | Eff: {1}". format(len(events), len(events) / len(Initial_events) * 100)) # Particle Identification Electron = events.Electron def Electron_selection(ele): return (ele.pt > 20) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 1) # Electron channel Electron_mask = Electron_selection(Electron) Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1 Ele_channel_events = events[Ele_channel_mask] N_Ele_Channel_events = len(Ele_channel_events) print("#1 Ele channel evts: {0} --> {1}".format( len(events), N_Ele_Channel_events)) # Electron array Ele = Ele_channel_events.Electron Electron_mask = Electron_selection(Ele) Ele_sel = Ele[Electron_mask] # Electron pair ele_pairs = ak.combinations(Ele_sel, 2, axis=1) ele_left, ele_right = ak.unzip(ele_pairs) diele = ele_left + ele_right # OS os_mask = diele.charge == 0 os_diele = diele[os_mask] os_ele_left = ele_left[os_mask] os_ele_right = ele_right[os_mask] # Helper function: High PT argmax def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] # -- Only Leading pair -- #leading_diele = make_leading_pair(diele,diele) #leading_ele = make_leading_pair(ele_left,diele) #subleading_ele= make_leading_pair(ele_right,diele) # --OS and Leading pair -- leading_os_diele = make_leading_pair(os_diele, os_diele) leading_os_ele = make_leading_pair(os_ele_left, os_diele) subleading_os_ele = make_leading_pair(os_ele_right, os_diele) # Helper function: Zmass window def makeZmass_window_mask(dielecs, start=60, end=120): mask = (dielecs.mass >= start) & (dielecs.mass <= end) return mask # -- Only Leading pair -- #Zmass_mask = makeZmass_window_mask(leading_diele) #leading_Zwindow_ele = leading_ele[Zmass_mask] #subleading_Zwindow_ele = subleading_ele[Zmass_mask] #leading_Zwindow_diele = leading_diele[Zmass_mask] # -- OS and Leading pair -- Zmass_mask_os = makeZmass_window_mask(leading_os_diele) leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os] subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os] leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os] #ele1PT = flat_dim(leading_Zwindow_ele.pt) #ele1Eta = flat_dim(leading_Zwindow_ele.eta) #ele1Phi = flat_dim(leading_Zwindow_ele.phi) #ele2PT = flat_dim(subleading_Zwindow_ele.pt) #ele2Eta = flat_dim(subleading_Zwindow_ele.eta) #ele2Phi = flat_dim(subleading_Zwindow_ele.phi) #Mee = flat_dim(leading_Zwindow_diele.mass) #charge = flat_dim(leading_Zwindow_diele.charge) os_ele1PT = flat_dim(leading_os_Zwindow_ele.pt) os_ele1Eta = flat_dim(leading_os_Zwindow_ele.eta) os_ele1Phi = flat_dim(leading_os_Zwindow_ele.phi) os_ele2PT = flat_dim(subleading_os_Zwindow_ele.pt) os_ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta) os_ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi) os_Mee = flat_dim(leading_os_Zwindow_diele.mass) os_charge = flat_dim(leading_os_Zwindow_diele.charge) #print("#5 Leading PT : {0}".format(len(ele1PT))) #print("#5 Leading os PT {0}".format(len(os_ele1PT))) # --- Apply weight --- on progress # #if not isData: # weights = processor.Weights(len(events)) # weights.add('pileup',pu) out["sumw"][dataset] += len(events) out["os_mass"].fill(dataset=dataset, os_mass=os_Mee) out["os_ele1pt"].fill(dataset=dataset, os_ele1pt=os_ele1PT) out["os_ele1eta"].fill(dataset=dataset, os_ele1eta=os_ele1Eta) out["os_ele1phi"].fill(dataset=dataset, os_ele1phi=os_ele1Phi) out["os_ele2pt"].fill(dataset=dataset, os_ele2pt=os_ele2PT) out["os_ele2eta"].fill(dataset=dataset, os_ele2eta=os_ele2Eta) out["os_ele2phi"].fill(dataset=dataset, os_ele2phi=os_ele2Phi) return out
import numpy as np import awkward1 as ak import uproot4 # f = uproot4.open("~/storage/data/uproot4-big/issue-131little.root") t = uproot4.open("/Users/chrispap/QCD/*.root:TreeMaker2/PreSelection") #t = f["TreeMaker2/PreSelection"] events = t.arrays([ "HT", "CrossSection", "Tracks.fCoordinates.fX", "Tracks.fCoordinates.fY", "Tracks.fCoordinates.fZ", "Tracks_fromPV0", "Tracks_matchedToPFCandidate", ], entry_start=99000) cut_events = events[events.HT > 1200] HT, CrossSection, x, y, z, num_fromPV0, matched_to_candidate = ak.unzip(cut_events) pt = np.sqrt(x**2 + y**2 + z**2) eta = np.arcsinh(z / np.sqrt(x**2 + y**2)) track_cut = (pt > 1) & abs(eta < 2.5) & (num_fromPV0 >= 2) & matched_to_candidate multiplicity = ak.sum(track_cut, axis=1)