Exemple #1
0
def test_string():
    assert awkward1.values_astype(awkward1.Array([{
        "x": 1.1,
        "y": "hello"
    }]), numpy.float32).tolist() == [{
        'x': 1.100000023841858,
        'y': 'hello'
    }]
Exemple #2
0
def test_UnmaskedArray():
    content_float64 = awkward1.layout.NumpyArray(
        numpy.array([0.25, 0.5, 3.5, 4.5, 5.5], dtype=numpy.float64))
    array_float64 = awkward1.layout.UnmaskedArray(content_float64)
    assert awkward1.to_list(array_float64) == [0.25, 0.5, 3.5, 4.5, 5.5]
    assert str(awkward1.type(content_float64)) == "float64"
    assert str(awkward1.type(awkward1.Array(content_float64))) == "5 * float64"
    assert str(awkward1.type(array_float64)) == "?float64"
    assert str(awkward1.type(awkward1.Array(array_float64))) == "5 * ?float64"

    assert numpy.can_cast(numpy.float32, numpy.float64) == True
    assert numpy.can_cast(numpy.float64, numpy.float32, 'unsafe') == True
    assert numpy.can_cast(numpy.float64, numpy.int8, 'unsafe') == True

    content_float32 = awkward1.values_astype(content_float64,
                                             'float32',
                                             highlevel=False)
    array_float32 = awkward1.layout.UnmaskedArray(content_float32)
    assert awkward1.to_list(array_float32) == [0.25, 0.5, 3.5, 4.5, 5.5]
    assert str(awkward1.type(content_float32)) == "float32"
    assert str(awkward1.type(awkward1.Array(content_float32))) == "5 * float32"
    assert str(awkward1.type(array_float32)) == "?float32"
    assert str(awkward1.type(awkward1.Array(array_float32))) == "5 * ?float32"

    content_int8 = awkward1.values_astype(content_float64,
                                          'int8',
                                          highlevel=False)
    array_int8 = awkward1.layout.UnmaskedArray(content_int8)
    assert awkward1.to_list(array_int8) == [0, 0, 3, 4, 5]
    assert str(awkward1.type(content_int8)) == "int8"
    assert str(awkward1.type(awkward1.Array(content_int8))) == "5 * int8"
    assert str(awkward1.type(array_int8)) == "?int8"
    assert str(awkward1.type(awkward1.Array(array_int8))) == "5 * ?int8"

    content_from_int8 = awkward1.values_astype(content_int8,
                                               'float64',
                                               highlevel=False)
    array_from_int8 = awkward1.layout.UnmaskedArray(content_from_int8)
    assert awkward1.to_list(array_from_int8) == [0, 0, 3, 4, 5]
    assert str(awkward1.type(content_from_int8)) == "float64"
    assert str(awkward1.type(
        awkward1.Array(content_from_int8))) == "5 * float64"
    assert str(awkward1.type(array_from_int8)) == "?float64"
    assert str(awkward1.type(
        awkward1.Array(array_from_int8))) == "5 * ?float64"
Exemple #3
0
def test_RegularArray_and_ListArray():
    content = awkward1.layout.NumpyArray(numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]));
    offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 10, 10]))
    listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content)
    regulararray = awkward1.layout.RegularArray(listoffsetarray, 2)
    starts = awkward1.layout.Index64(numpy.array([0, 1]))
    stops = awkward1.layout.Index64(numpy.array([2, 3]))
    listarray = awkward1.layout.ListArray64(starts, stops, regulararray)

    assert str(awkward1.type(content)) == "float64"
    assert str(awkward1.type(regulararray)) == "2 * var * float64"
    assert str(awkward1.type(listarray)) == "var * 2 * var * float64"

    regulararray_int8 = awkward1.values_astype(regulararray, 'int8', highlevel=False)
    assert str(awkward1.type(regulararray_int8)) == "2 * var * int8"

    listarray_bool = awkward1.values_astype(listarray, 'bool', highlevel=False)
    assert str(awkward1.type(listarray_bool)) == "var * 2 * var * bool"
Exemple #4
0
def test_ufunc_afterward():
    assert awkward1.to_list(
        awkward1.values_astype(awkward1.Array([{
            "x": 1.1
        }, {
            "x": 3.3
        }]), numpy.float32) + 1) == [{
            "x": 2.0999999046325684
        }, {
            "x": 4.300000190734863
        }]
Exemple #5
0
 def astype(self, dtype, copy=False):
     # operates elementwise
     # if we really wanted a normal array out, would use ak.to_numpy
     if isinstance(dtype, AwkardType):
         if copy:
             return type(self)(self.data.copy())
         else:
             return self
     if dtype in [object, "O", "object"]:
         return self.tolist()
     return ak.values_astype(self.data, dtype)
Exemple #6
0
def test_ufunc_afterward():
    assert (awkward1.values_astype(awkward1.Array([{"x": 1.1}, {"x": 3.3}]), numpy.float32)["x"] + 1).tolist() == [2.0999999046325684, 4.300000190734863]
	def process(self, events):

		# Initialize accumulator
		out = self.accumulator.identity()
		dataset = setname
		#events.metadata['dataset']
		

		isData = 'genWeight' not in events.fields
		

		selection = processor.PackedSelection()

		# Cut flow
		cut0 = np.zeros(len(events))
		

		# --- Selection

		# << flat dim helper function >>
		def flat_dim(arr):

			sub_arr = ak.flatten(arr)
			mask = ~ak.is_none(sub_arr)

			return ak.to_numpy(sub_arr[mask])
		# << drop na helper function >>
		def drop_na(arr):

			mask = ~ak.is_none(arr)

			return arr[mask]
		# << drop na helper function >>
		def drop_na_np(arr):

			mask = ~np.isnan(arr)

			return arr[mask]


		# double lepton trigger
		is_double_ele_trigger=True
		if not is_double_ele_trigger:
			double_ele_triggers_arr=np.ones(len(events), dtype=np.bool)
		else:
			double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._doubleelectron_triggers[self._year]:
				if path not in events.HLT.fields: continue
				double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path]


		# single lepton trigger
		is_single_ele_trigger=True
		if not is_single_ele_trigger:
			single_ele_triggers_arr=np.ones(len(events), dtype=np.bool)
		else:
			single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._singleelectron_triggers[self._year]:
				if path not in events.HLT.fields: continue
				single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path]


		
		Initial_events = events
		print("#### Initial events: ",Initial_events)
		#events = events[single_ele_triggers_arr | double_ele_triggers_arr]
		events = events[double_ele_triggers_arr]
		
		##----------- Cut flow1: Passing Triggers
		cut1 = np.ones(len(events))
		print("#### cut1: ",len(cut1))
		# Particle Identification
		Electron = events.Electron

		def Electron_selection(ele):
			return(ele.pt > 25) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 2)
		

		# Electron channel
		Electron_mask = Electron_selection(Electron)
		Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1
		Ele_channel_events = events[Ele_channel_mask]


		##-----------  Cut flow2: Electron channel
		cut2 = np.ones(len(Ele_channel_events)) * 2
		print("#### cut2: ",len(cut2))
		
		# --- Calculate Scale factor weight
		
		if not isData:
			# PU weight with lookup table <-- On developing -->
			#get_pu_weight = self._corrections['get_pu_weight'][self._year]
			#pu = get_pu_weight(events.Pileup.nTrueInt)
	
			get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year]
			get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year]


			get_ele_trig_leg1_SF		= self._corrections['get_ele_trig_leg1_SF'][self._year]
			get_ele_trig_leg1_data_Eff	= self._corrections['get_ele_trig_leg1_data_Eff'][self._year]
			get_ele_trig_leg1_mc_Eff	= self._corrections['get_ele_trig_leg1_mc_Eff'][self._year]
			get_ele_trig_leg2_SF		= self._corrections['get_ele_trig_leg2_SF'][self._year]
			get_ele_trig_leg2_data_Eff  = self._corrections['get_ele_trig_leg2_data_Eff'][self._year]
			get_ele_trig_leg2_mc_Eff	= self._corrections['get_ele_trig_leg2_mc_Eff'][self._year]





			# PU weight with custom made npy and multi-indexing
			pu_weight_idx = ak.values_astype(Ele_channel_events.Pileup.nTrueInt,"int64")
			pu = self._puweight_arr[pu_weight_idx]
			nPV = Ele_channel_events.PV.npvsGood
		
		else:
			nPV = Ele_channel_events.PV.npvsGood


		# Electron array
		Ele = Ele_channel_events.Electron
		Electron_mask = Electron_selection(Ele)	
		Ele_sel = Ele[Electron_mask]	



		# Electron pair
		ele_pairs = ak.combinations(Ele_sel,2,axis=1)
		ele_left, ele_right = ak.unzip(ele_pairs)
		diele = ele_left + ele_right

		# OS
		os_mask		 = diele.charge == 0 
		os_diele	 = diele[os_mask]
		os_ele_left  = ele_left[os_mask]
		os_ele_right = ele_right[os_mask]
		os_event_mask = ak.num(os_diele) > 0
		Ele_os_channel_events = Ele_channel_events[os_event_mask]
		#selection.add('ossf',os_event_mask)


		# Helper function: High PT argmax
		def make_leading_pair(target,base):

			return target[ak.argmax(base.pt,axis=1,keepdims=True)]


		# -- Only Leading pair --
		leading_diele = make_leading_pair(diele,diele)
		leading_ele   = make_leading_pair(ele_left,diele)
		subleading_ele= make_leading_pair(ele_right,diele)

		# -- Scale Factor for each electron

		def Trigger_Weight(eta1,pt1,eta2,pt2):
			per_ev_MC =\
			get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\
			get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\
			get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2)

			per_ev_data =\
			get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\
			get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\
			get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2)

			return per_ev_data/per_ev_MC
			

		if not isData:
			ele_loose_id_sf = get_ele_loose_id_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_loose_id_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt))
			#print("Ele ID SC---->",ele_loose_id_sf)
			
			ele_reco_sf = get_ele_reco_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_reco_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt))
			#print("Ele RECO SC---->",ele_reco_sf)
		
		
			eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
			eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
			pt1  = ak.flatten(leading_ele.pt)	
			pt2  = ak.flatten(subleading_ele.pt)

			ele_trig_weight = Trigger_Weight(eta1,pt1,eta2,pt2)
			print("#### Test print trigger weight ####")
			print(ele_trig_weight)

		# --OS and Leading pair --
		leading_os_diele = make_leading_pair(os_diele,os_diele)
		leading_os_ele   = make_leading_pair(os_ele_left,os_diele)
		subleading_os_ele= make_leading_pair(os_ele_right,os_diele)

		##-----------  Cut flow3: OSSF
		cut3 = np.ones(len(flat_dim(leading_os_diele))) * 3
		print("#### cut3: ",len(cut3))

		# Helper function: Zmass window
		def makeZmass_window_mask(dielecs,start=60,end=120):
			mask = (dielecs.mass >= start) & (dielecs.mass <= end)	
			return mask

		# -- OS and Leading pair --
		Zmass_mask_os = makeZmass_window_mask(leading_os_diele)
		leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os]
		subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os]
		leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os]
		

		# for masking
		Zmass_event_mask = makeZmass_window_mask(leading_diele)
		Zmass_os_event_mask= ak.flatten(os_event_mask * Zmass_event_mask)
		

		Ele_Zmass_os_events = Ele_channel_events[Zmass_os_event_mask]

		##-----------  Cut flow4: Zmass
		cut4 = np.ones(len(flat_dim(leading_os_Zwindow_diele))) * 4
		print("#### cut4: ",len(cut4))


		
		## << Selection method -- Need validation >>
		#print("a--->",len(Ele_channel_events))
		#print("b--->",len(Ele_os_channel_events))
		#print("b2--->",len(cut3))
		#print("c--->",len(Ele_Zmass_os_events))
		#print("c2--->",len(cut4))


		ele1PT  = flat_dim(leading_os_Zwindow_ele.pt)
		ele1Eta = flat_dim(leading_os_Zwindow_ele.eta)
		ele1Phi = flat_dim(leading_os_Zwindow_ele.phi)
		ele2PT  = flat_dim(subleading_os_Zwindow_ele.pt)
		ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta)
		ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi)
		Mee	 = flat_dim(leading_os_Zwindow_diele.mass)
		charge  = flat_dim(leading_os_Zwindow_diele.charge)
		
		# --- Apply weight and hist  
		weights = processor.Weights(len(cut2))


		# --- skim cut-weight 
		def skim_weight(arr):
			mask1 = ~ak.is_none(arr)
			subarr = arr[mask1]
			mask2 = subarr !=0
			return ak.to_numpy(subarr[mask2])

		cuts = ak.flatten(Zmass_mask_os)
		if not isData:
			weights.add('pileup',pu)		
			weights.add('ele_id',ele_loose_id_sf)		
			weights.add('ele_reco',ele_reco_sf)		
			#weights.add('ele_trigger',ele_trig_weight)		

		# Initial events
		out["sumw"][dataset] += len(Initial_events)


		# Cut flow loop
		for cut in [cut0,cut1,cut2,cut3,cut4]:
			out["cutflow"].fill(
				dataset = dataset,
				cutflow=cut
			)

		

		# Primary vertex
		out['nPV'].fill(
			dataset=dataset,
			nPV = nPV,
			weight = weights.weight()
		)
		out['nPV_nw'].fill(
			dataset=dataset,
			nPV_nw = nPV
		)

		# Physics varibles passing Zwindow
		out["mass"].fill(
			dataset=dataset,
			mass=Mee,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1pt"].fill(
			dataset=dataset,
			ele1pt=ele1PT,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1eta"].fill(
			dataset=dataset,
			ele1eta=ele1Eta,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1phi"].fill(
			dataset=dataset,
			ele1phi=ele1Phi,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2pt"].fill(
			dataset=dataset,
			ele2pt=ele2PT,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2eta"].fill(
			dataset=dataset,
			ele2eta=ele2Eta,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2phi"].fill(
			dataset=dataset,
			ele2phi=ele2Phi,
			weight = skim_weight(weights.weight() * cuts)
		)
		return out
Exemple #8
0
def read(filename: Union[Path, str],
         events_per_chunk: int,
         parser: str = "pandas") -> Optional[Iterator[ak.Array]]:
    """ Read a JETSCAPE ascii output file in chunks.

    This is the main user function. We read in chunks to keep the memory usage manageable.

    Note:
        We store the data in the smallest possible types that can still encompass their range.

    Args:
        filename: Filename of the ascii file.
        events_per_chunk: Number of events to provide in each chunk.
        parser: Name of the parser to use. Default: `pandas`, which uses `pandas.read_csv`. It uses
            compiled c, and seems to be the fastest available option. Other options: ["python", "numpy"].
    Returns:
        Generator of an array of events_per_chunk events.
    """
    # Validation
    filename = Path(filename)

    # Setup
    parsing_function_map = {
        "pandas": _parse_with_pandas,
        "python": _parse_with_python,
        "numpy": _parse_with_numpy,
    }
    parsing_function = parsing_function_map[parser]

    # Read the file, creating chunks of events.
    for chunk_generator, event_split_index, event_header_info in read_events_in_chunks(
            filename=filename, events_per_chunk=events_per_chunk):
        # Give a notification just in case the parsing is slow...
        logger.debug("New chunk")

        # Parse the file and create the awkward event structure.
        array_with_events = ak.Array(
            np.split(parsing_function(chunk_generator), event_split_index))

        # Cross check that everything is in order and was parsed correctly.
        if events_per_chunk > 0:
            assert len(event_split_index) == events_per_chunk - 1
            assert len(event_header_info) == events_per_chunk

        #print(len(event_split_index))
        #print(f"hadrons: {hadrons}")
        #print(f"array_with_events: {array_with_events}")
        #print(ak.type(array_with_events))
        #print(f"Event header info: {event_header_info}")
        #import IPython; IPython.embed()

        # Convert to the desired structure for our awkward array.
        array = ak.zip(
            {
                # TODO: Does the conversion add any real computation time?
                "particle_ID":
                ak.values_astype(array_with_events[:, :, 1], np.int32),
                # Status is only a couple of numbers, but it's not always 0. It identifies recoils (1?) and holes (-1?)
                "status":
                ak.values_astype(array_with_events[:, :, 2], np.int8),
                "E":
                ak.values_astype(array_with_events[:, :, 3], np.float32),
                "px":
                ak.values_astype(array_with_events[:, :, 4], np.float32),
                "py":
                ak.values_astype(array_with_events[:, :, 5], np.float32),
                "pz":
                ak.values_astype(array_with_events[:, :, 6], np.float32),
                # Skip these because we're going to be working with four vectors anyway, so it shouldn't be a
                # big deal to recalculate them, especially compare to the added storage space.
                "eta":
                ak.values_astype(array_with_events[:, :, 7], np.float32),
                "phi":
                ak.values_astype(array_with_events[:, :, 8], np.float32),
            }, )

        yield array