def test_string(): assert awkward1.values_astype(awkward1.Array([{ "x": 1.1, "y": "hello" }]), numpy.float32).tolist() == [{ 'x': 1.100000023841858, 'y': 'hello' }]
def test_UnmaskedArray(): content_float64 = awkward1.layout.NumpyArray( numpy.array([0.25, 0.5, 3.5, 4.5, 5.5], dtype=numpy.float64)) array_float64 = awkward1.layout.UnmaskedArray(content_float64) assert awkward1.to_list(array_float64) == [0.25, 0.5, 3.5, 4.5, 5.5] assert str(awkward1.type(content_float64)) == "float64" assert str(awkward1.type(awkward1.Array(content_float64))) == "5 * float64" assert str(awkward1.type(array_float64)) == "?float64" assert str(awkward1.type(awkward1.Array(array_float64))) == "5 * ?float64" assert numpy.can_cast(numpy.float32, numpy.float64) == True assert numpy.can_cast(numpy.float64, numpy.float32, 'unsafe') == True assert numpy.can_cast(numpy.float64, numpy.int8, 'unsafe') == True content_float32 = awkward1.values_astype(content_float64, 'float32', highlevel=False) array_float32 = awkward1.layout.UnmaskedArray(content_float32) assert awkward1.to_list(array_float32) == [0.25, 0.5, 3.5, 4.5, 5.5] assert str(awkward1.type(content_float32)) == "float32" assert str(awkward1.type(awkward1.Array(content_float32))) == "5 * float32" assert str(awkward1.type(array_float32)) == "?float32" assert str(awkward1.type(awkward1.Array(array_float32))) == "5 * ?float32" content_int8 = awkward1.values_astype(content_float64, 'int8', highlevel=False) array_int8 = awkward1.layout.UnmaskedArray(content_int8) assert awkward1.to_list(array_int8) == [0, 0, 3, 4, 5] assert str(awkward1.type(content_int8)) == "int8" assert str(awkward1.type(awkward1.Array(content_int8))) == "5 * int8" assert str(awkward1.type(array_int8)) == "?int8" assert str(awkward1.type(awkward1.Array(array_int8))) == "5 * ?int8" content_from_int8 = awkward1.values_astype(content_int8, 'float64', highlevel=False) array_from_int8 = awkward1.layout.UnmaskedArray(content_from_int8) assert awkward1.to_list(array_from_int8) == [0, 0, 3, 4, 5] assert str(awkward1.type(content_from_int8)) == "float64" assert str(awkward1.type( awkward1.Array(content_from_int8))) == "5 * float64" assert str(awkward1.type(array_from_int8)) == "?float64" assert str(awkward1.type( awkward1.Array(array_from_int8))) == "5 * ?float64"
def test_RegularArray_and_ListArray(): content = awkward1.layout.NumpyArray(numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])); offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 10, 10])) listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content) regulararray = awkward1.layout.RegularArray(listoffsetarray, 2) starts = awkward1.layout.Index64(numpy.array([0, 1])) stops = awkward1.layout.Index64(numpy.array([2, 3])) listarray = awkward1.layout.ListArray64(starts, stops, regulararray) assert str(awkward1.type(content)) == "float64" assert str(awkward1.type(regulararray)) == "2 * var * float64" assert str(awkward1.type(listarray)) == "var * 2 * var * float64" regulararray_int8 = awkward1.values_astype(regulararray, 'int8', highlevel=False) assert str(awkward1.type(regulararray_int8)) == "2 * var * int8" listarray_bool = awkward1.values_astype(listarray, 'bool', highlevel=False) assert str(awkward1.type(listarray_bool)) == "var * 2 * var * bool"
def test_ufunc_afterward(): assert awkward1.to_list( awkward1.values_astype(awkward1.Array([{ "x": 1.1 }, { "x": 3.3 }]), numpy.float32) + 1) == [{ "x": 2.0999999046325684 }, { "x": 4.300000190734863 }]
def astype(self, dtype, copy=False): # operates elementwise # if we really wanted a normal array out, would use ak.to_numpy if isinstance(dtype, AwkardType): if copy: return type(self)(self.data.copy()) else: return self if dtype in [object, "O", "object"]: return self.tolist() return ak.values_astype(self.data, dtype)
def test_ufunc_afterward(): assert (awkward1.values_astype(awkward1.Array([{"x": 1.1}, {"x": 3.3}]), numpy.float32)["x"] + 1).tolist() == [2.0999999046325684, 4.300000190734863]
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = setname #events.metadata['dataset'] isData = 'genWeight' not in events.fields selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # --- Selection # << flat dim helper function >> def flat_dim(arr): sub_arr = ak.flatten(arr) mask = ~ak.is_none(sub_arr) return ak.to_numpy(sub_arr[mask]) # << drop na helper function >> def drop_na(arr): mask = ~ak.is_none(arr) return arr[mask] # << drop na helper function >> def drop_na_np(arr): mask = ~np.isnan(arr) return arr[mask] # double lepton trigger is_double_ele_trigger=True if not is_double_ele_trigger: double_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path] # single lepton trigger is_single_ele_trigger=True if not is_single_ele_trigger: single_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path] Initial_events = events print("#### Initial events: ",Initial_events) #events = events[single_ele_triggers_arr | double_ele_triggers_arr] events = events[double_ele_triggers_arr] ##----------- Cut flow1: Passing Triggers cut1 = np.ones(len(events)) print("#### cut1: ",len(cut1)) # Particle Identification Electron = events.Electron def Electron_selection(ele): return(ele.pt > 25) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 2) # Electron channel Electron_mask = Electron_selection(Electron) Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1 Ele_channel_events = events[Ele_channel_mask] ##----------- Cut flow2: Electron channel cut2 = np.ones(len(Ele_channel_events)) * 2 print("#### cut2: ",len(cut2)) # --- Calculate Scale factor weight if not isData: # PU weight with lookup table <-- On developing --> #get_pu_weight = self._corrections['get_pu_weight'][self._year] #pu = get_pu_weight(events.Pileup.nTrueInt) get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year] get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year] get_ele_trig_leg1_SF = self._corrections['get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections['get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections['get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections['get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections['get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections['get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(Ele_channel_events.Pileup.nTrueInt,"int64") pu = self._puweight_arr[pu_weight_idx] nPV = Ele_channel_events.PV.npvsGood else: nPV = Ele_channel_events.PV.npvsGood # Electron array Ele = Ele_channel_events.Electron Electron_mask = Electron_selection(Ele) Ele_sel = Ele[Electron_mask] # Electron pair ele_pairs = ak.combinations(Ele_sel,2,axis=1) ele_left, ele_right = ak.unzip(ele_pairs) diele = ele_left + ele_right # OS os_mask = diele.charge == 0 os_diele = diele[os_mask] os_ele_left = ele_left[os_mask] os_ele_right = ele_right[os_mask] os_event_mask = ak.num(os_diele) > 0 Ele_os_channel_events = Ele_channel_events[os_event_mask] #selection.add('ossf',os_event_mask) # Helper function: High PT argmax def make_leading_pair(target,base): return target[ak.argmax(base.pt,axis=1,keepdims=True)] # -- Only Leading pair -- leading_diele = make_leading_pair(diele,diele) leading_ele = make_leading_pair(ele_left,diele) subleading_ele= make_leading_pair(ele_right,diele) # -- Scale Factor for each electron def Trigger_Weight(eta1,pt1,eta2,pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data/per_ev_MC if not isData: ele_loose_id_sf = get_ele_loose_id_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_loose_id_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele ID SC---->",ele_loose_id_sf) ele_reco_sf = get_ele_reco_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_reco_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele RECO SC---->",ele_reco_sf) eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) ele_trig_weight = Trigger_Weight(eta1,pt1,eta2,pt2) print("#### Test print trigger weight ####") print(ele_trig_weight) # --OS and Leading pair -- leading_os_diele = make_leading_pair(os_diele,os_diele) leading_os_ele = make_leading_pair(os_ele_left,os_diele) subleading_os_ele= make_leading_pair(os_ele_right,os_diele) ##----------- Cut flow3: OSSF cut3 = np.ones(len(flat_dim(leading_os_diele))) * 3 print("#### cut3: ",len(cut3)) # Helper function: Zmass window def makeZmass_window_mask(dielecs,start=60,end=120): mask = (dielecs.mass >= start) & (dielecs.mass <= end) return mask # -- OS and Leading pair -- Zmass_mask_os = makeZmass_window_mask(leading_os_diele) leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os] subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os] leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os] # for masking Zmass_event_mask = makeZmass_window_mask(leading_diele) Zmass_os_event_mask= ak.flatten(os_event_mask * Zmass_event_mask) Ele_Zmass_os_events = Ele_channel_events[Zmass_os_event_mask] ##----------- Cut flow4: Zmass cut4 = np.ones(len(flat_dim(leading_os_Zwindow_diele))) * 4 print("#### cut4: ",len(cut4)) ## << Selection method -- Need validation >> #print("a--->",len(Ele_channel_events)) #print("b--->",len(Ele_os_channel_events)) #print("b2--->",len(cut3)) #print("c--->",len(Ele_Zmass_os_events)) #print("c2--->",len(cut4)) ele1PT = flat_dim(leading_os_Zwindow_ele.pt) ele1Eta = flat_dim(leading_os_Zwindow_ele.eta) ele1Phi = flat_dim(leading_os_Zwindow_ele.phi) ele2PT = flat_dim(subleading_os_Zwindow_ele.pt) ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta) ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi) Mee = flat_dim(leading_os_Zwindow_diele.mass) charge = flat_dim(leading_os_Zwindow_diele.charge) # --- Apply weight and hist weights = processor.Weights(len(cut2)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr !=0 return ak.to_numpy(subarr[mask2]) cuts = ak.flatten(Zmass_mask_os) if not isData: weights.add('pileup',pu) weights.add('ele_id',ele_loose_id_sf) weights.add('ele_reco',ele_reco_sf) #weights.add('ele_trigger',ele_trig_weight) # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0,cut1,cut2,cut3,cut4]: out["cutflow"].fill( dataset = dataset, cutflow=cut ) # Primary vertex out['nPV'].fill( dataset=dataset, nPV = nPV, weight = weights.weight() ) out['nPV_nw'].fill( dataset=dataset, nPV_nw = nPV ) # Physics varibles passing Zwindow out["mass"].fill( dataset=dataset, mass=Mee, weight = skim_weight(weights.weight() * cuts) ) out["ele1pt"].fill( dataset=dataset, ele1pt=ele1PT, weight = skim_weight(weights.weight() * cuts) ) out["ele1eta"].fill( dataset=dataset, ele1eta=ele1Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele1phi"].fill( dataset=dataset, ele1phi=ele1Phi, weight = skim_weight(weights.weight() * cuts) ) out["ele2pt"].fill( dataset=dataset, ele2pt=ele2PT, weight = skim_weight(weights.weight() * cuts) ) out["ele2eta"].fill( dataset=dataset, ele2eta=ele2Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele2phi"].fill( dataset=dataset, ele2phi=ele2Phi, weight = skim_weight(weights.weight() * cuts) ) return out
def read(filename: Union[Path, str], events_per_chunk: int, parser: str = "pandas") -> Optional[Iterator[ak.Array]]: """ Read a JETSCAPE ascii output file in chunks. This is the main user function. We read in chunks to keep the memory usage manageable. Note: We store the data in the smallest possible types that can still encompass their range. Args: filename: Filename of the ascii file. events_per_chunk: Number of events to provide in each chunk. parser: Name of the parser to use. Default: `pandas`, which uses `pandas.read_csv`. It uses compiled c, and seems to be the fastest available option. Other options: ["python", "numpy"]. Returns: Generator of an array of events_per_chunk events. """ # Validation filename = Path(filename) # Setup parsing_function_map = { "pandas": _parse_with_pandas, "python": _parse_with_python, "numpy": _parse_with_numpy, } parsing_function = parsing_function_map[parser] # Read the file, creating chunks of events. for chunk_generator, event_split_index, event_header_info in read_events_in_chunks( filename=filename, events_per_chunk=events_per_chunk): # Give a notification just in case the parsing is slow... logger.debug("New chunk") # Parse the file and create the awkward event structure. array_with_events = ak.Array( np.split(parsing_function(chunk_generator), event_split_index)) # Cross check that everything is in order and was parsed correctly. if events_per_chunk > 0: assert len(event_split_index) == events_per_chunk - 1 assert len(event_header_info) == events_per_chunk #print(len(event_split_index)) #print(f"hadrons: {hadrons}") #print(f"array_with_events: {array_with_events}") #print(ak.type(array_with_events)) #print(f"Event header info: {event_header_info}") #import IPython; IPython.embed() # Convert to the desired structure for our awkward array. array = ak.zip( { # TODO: Does the conversion add any real computation time? "particle_ID": ak.values_astype(array_with_events[:, :, 1], np.int32), # Status is only a couple of numbers, but it's not always 0. It identifies recoils (1?) and holes (-1?) "status": ak.values_astype(array_with_events[:, :, 2], np.int8), "E": ak.values_astype(array_with_events[:, :, 3], np.float32), "px": ak.values_astype(array_with_events[:, :, 4], np.float32), "py": ak.values_astype(array_with_events[:, :, 5], np.float32), "pz": ak.values_astype(array_with_events[:, :, 6], np.float32), # Skip these because we're going to be working with four vectors anyway, so it shouldn't be a # big deal to recalculate them, especially compare to the added storage space. "eta": ak.values_astype(array_with_events[:, :, 7], np.float32), "phi": ak.values_astype(array_with_events[:, :, 8], np.float32), }, ) yield array