def calcGeometricOffset(rCone, E, f_id, mu, mucut): E = ak.to_numpy(ak.flatten(E)).reshape(len(E), nEta)[mu > mucut] f_id = ak.to_numpy(ak.flatten(f_id)).reshape(len(f_id), nEta)[mu > mucut] if (len(f_id) != len(E)): print("Error") area = 2 * np.pi * (etabins[1:] - etabins[:-1]) return E * f_id * np.pi * rCone * rCone / 255. / np.cosh(etaC) / area
def __call__(self, runs, lumis): """Check if run and lumi are valid Parameters ---------- runs : numpy.ndarray Vectorized list of run numbers lumis : numpy.ndarray Vectorized list of lumiSection numbers Returns ------- mask_out : numpy.ndarray An array of dtype `bool` where valid (run, lumi) tuples will have their corresponding entry set ``True``. """ # fill numba typed dict _masks = Dict.empty(key_type=types.uint32, value_type=types.uint32[:]) for k, v in self._masks.items(): _masks[k] = v if isinstance(runs, ak.highlevel.Array): runs = ak.to_numpy(runs) if isinstance(lumis, ak.highlevel.Array): lumis = ak.to_numpy(lumis) mask_out = np.zeros(dtype="bool", shape=runs.shape) LumiMask._apply_run_lumi_mask_kernel(_masks, runs, lumis, mask_out) return mask_out
def normalize(val, cut): if cut is None: ar = ak.to_numpy(ak.fill_none(val, np.nan)) return ar else: ar = ak.to_numpy(ak.fill_none(val[cut], np.nan)) return ar
def getimage(pt,eta,phi): hs=[] rnge=((-2.4,2.4),(-np.pi,np.pi)) for i in range(len(eta)): h=np.histogram2d(ak.to_numpy(eta[i]),ak.to_numpy(phi[i]),weights=ak.to_numpy(pt[i]),bins=64,range=rnge) hs.append(h[0]) return np.stack(hs)
def process_event(weights, evt_vars): #, bjets): out_hists = {} out_hists["MET"] = Hist1D(ak.to_numpy(evt_vars.MET_pt), bins=bin_met, label="MET", weights=ak.to_numpy(genWeight)) #out_hists["njets"] = Hist1D(ak.to_numpy(evt_vars.nJet), bins = bin_njet, label="njet", weights = ak.to_numpy(genWeight)) #out_hists["nbjets"] = Hist1D(ak.num(bjets), bins = bin_bjet) out_hists["weight"] = ak.sum(weights) return out_hists
def make_plots(qArray, qArraySimTrackMatched, quantity, layerType): if len(qArray) == 0 or len(qArraySimTrackMatched) == 0: print("{} has no entries. Skipping".format(layerType)) return minValue = min(qArray[qArray > -999]) maxValue = max(qArray) histMinLimit = 1.1 * minValue if minValue < 0 else 0.9 * minValue histMaxLimit = 1.1 * maxValue if maxValue > 0 else 0.9 * maxValue binning = np.linspace(histMinLimit, histMaxLimit, 1000) allHist = Hist1D(ak.to_numpy(qArray[qArray > -999]), bins=binning, label="{}".format(quantity)) simtrackMatchedHist = Hist1D(ak.to_numpy( qArraySimTrackMatched[qArraySimTrackMatched > -999]), bins=binning, label="Sim track matched {}".format(quantity)) fig, ax = plt.subplots() ax.set_yscale("log") allHist.plot(alpha=0.8, color="C0", label="all", histtype="stepfilled") simtrackMatchedHist.plot(alpha=0.8, color="C3", label="sim track matched", histtype="stepfilled") if layerType == "": if "TripletPt" in quantity: title = quantity.replace("TripletPt", "Triplet radius") else: title = quantity plt.title("{}".format(title)) else: plt.title("{} type {}".format(quantity, layerType)) plt.suptitle("Sample = {} Tag = {}".format(sys.argv[3], sys.argv[4])) # extra job for the composite dudes quantity = quantity.replace("(", " ") quantity = quantity.replace(")", "") quantity = quantity.replace("/", "by") quantity = quantity.replace("-", "minus") quantity = quantity.replace(" ", "_") if quantity[0] == "_": quantity = quantity[1:] if len(sys.argv) > 2: if layerType != "": plt.savefig("{}/{}_{}.pdf".format(sys.argv[2], quantity, layerType)) else: plt.savefig("{}/{}.pdf".format(sys.argv[2], quantity)) else: if layerType != "": plt.savefig("{}_{}.pdf".format(quantity, layerType)) else: plt.savefig("{}.pdf".format(quantity)) plt.close()
def test_datasetprovider_awkward_exporter_single_tensor_same_shape(): """ The core assumption for simplest cases is that one system will be featurized as a single tensor, and that all the tensors will be of the same shape across systems. If that's the case: - DatasetProvider.to_numpy() will work and will return a X, y tuple of arrays - DatasetProvider.to_dict_of_arrays() will work and will return a dict of arrays with X, y keys. Note that `.to_numpy()` won't work if the core assumptions are broken. For those cases, `.to_dict_of_arrays()` is recommended instead. """ from kinoml.core.ligands import RDKitLigand from kinoml.features.ligand import MorganFingerprintFeaturizer from kinoml.features.core import Concatenated, TupleOfArrays import awkward as ak conditions = AssayConditions() systems = [ System([RDKitLigand.from_smiles(smi)]) for smi in ("CCCCC", "CCCCCCCC") ] measurements = [ BaseMeasurement(50, conditions=conditions, system=systems[0]), BaseMeasurement(30, conditions=conditions, system=systems[1]), ] dataset = DatasetProvider(measurements=measurements) featurizer1 = MorganFingerprintFeaturizer(radius=2, nbits=512) featurizer2 = MorganFingerprintFeaturizer(radius=2, nbits=1024) concatenate = Concatenated([featurizer1, featurizer2], axis=1) aggregated = TupleOfArrays([concatenate]) aggregated.featurize(systems) for system in systems: assert system.featurizations["last"][0].shape[0] == (1024 + 512) # With a single tensor per system, we build a unified X tensor # First dimension in X and y must match X, y = dataset.to_numpy() # This extra dimension here V # comes from the TupleOfArrays aggregation assert X.shape[:3] == (2, 1, (1024 + 512)) assert X.shape[0] == y.shape[0] # With dict_of_arrays and single tensor per system, # the behavior is essentially the same (Xa, ), ya = dataset.to_awkward() assert ak.to_numpy(Xa).shape == (2, (1024 + 512)) assert ak.to_numpy(ya).shape == (2, )
def test_numpyarray(): for dtype1 in ("i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4", "f8", "?"): for dtype2 in ("i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4", "f8", "?"): for dtype3 in ( "i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4", "f8", "?", ): for dtype4 in ( "i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4", "f8", "?", ): one = np.array([0, 1, 2], dtype=dtype1) two = np.array([3, 0], dtype=dtype2) three = np.array([], dtype=dtype3) four = np.array([4, 5, 0, 6, 7], dtype=dtype4) combined = np.concatenate([one, two, three, four]) ak_combined = ak.layout.NumpyArray(one).mergemany([ ak.layout.NumpyArray(two), ak.layout.NumpyArray(three), ak.layout.NumpyArray(four), ]) assert ak.to_list(ak_combined) == combined.tolist() assert ak.to_numpy(ak_combined).dtype == combined.dtype ak_combined = ak.layout.NumpyArray(one).mergemany([ ak.layout.NumpyArray(two), ak.layout.EmptyArray(), ak.layout.NumpyArray(four), ]) assert ak.to_list(ak_combined) == combined.tolist() assert (ak.to_numpy(ak_combined).dtype == np.concatenate( [one, two, four]).dtype)
def hash_root_file(path: Path, ordering_invariant: bool = True) -> str: rf = uproot.open(path) gh = hashlib.sha256() for tree_name in sorted(rf.keys()): gh.update(tree_name.encode("utf8")) try: tree = rf[tree_name] if not isinstance(tree, uproot.TTree): continue except NotImplementedError: continue keys = list(sorted(tree.keys())) branches = tree.arrays(library="ak") if not ordering_invariant: h = hashlib.sha256() for name in keys: h.update(name.encode("utf8")) arr = branches[name] arr = ak.flatten(arr, axis=None) arr = np.array(arr) h.update(arr.tobytes()) gh.update(h.digest()) else: items = np.array([]) for row in zip(*[branches[b] for b in keys]): h = hashlib.md5() for obj in row: if isinstance(obj, ak.highlevel.Array): if obj.ndim == 1: h.update(ak.to_numpy(obj).tobytes()) else: arr = ak.to_numpy(ak.flatten(obj, axis=None)) h.update(arr.tobytes()) else: h.update(np.array([obj]).tobytes()) items = np.append(items, h.digest()) items.sort() h = hashlib.sha256() h.update("".join(keys).encode("utf8")) h.update(items.tobytes()) gh.update(h.digest()) return gh.hexdigest()
def make_plots(qArray, qArraySimTrackMatched, quantity, layerType): minValue = min(qArray[qArray > -999]) maxValue = max(qArray) histMinLimit = minValue * 1.1 if minValue < 0 else minValue * 0.9 histMaxLimit = maxValue * 0.9 if maxValue < 0 else maxValue * 1.1 if abs(histMaxLimit - histMinLimit) > 10 and histMinLimit > 0 or "/" in quantity: binning = np.logspace(np.log10(histMinLimit), np.log10(histMaxLimit), 1000) else: binning = np.linspace(histMinLimit, histMaxLimit, 1000) allHist = Hist1D(ak.to_numpy(qArray[qArray > -999]), bins=binning, label="{}".format(quantity)) simtrackMatchedHist = Hist1D(ak.to_numpy( qArraySimTrackMatched[qArraySimTrackMatched > -999]), bins=binning, label="Sim track matched {}".format(quantity)) fig = plt.figure() plt.yscale("log") if abs(histMaxLimit - histMinLimit) > 10 and histMinLimit > 0 or "/" in quantity: plt.xscale("log") allHist.plot(alpha=0.8, color="C0", label="all") simtrackMatchedHist.plot(alpha=0.8, color="C3", label="sim track matched") if layerType == "": plt.title("{}".format(quantity)) else: plt.title("{} type {}".format(quantity, layerType)) plt.suptitle("Sample = {} Tag = {}".format(sys.argv[3], sys.argv[4])) #extra job for the composite dudes quantity = quantity.replace("(", " ") quantity = quantity.replace(")", "") quantity = quantity.replace("/", "by") quantity = quantity.replace("-", "minus") quantity = quantity.replace(" ", "_") if len(sys.argv) > 2: if layerType != "": plt.savefig("{}/{}_{}.pdf".format(sys.argv[2], quantity, layerType)) else: plt.savefig("{}/{}.pdf".format(sys.argv[2], quantity)) else: if layerType != "": plt.savefig("{}_{}.pdf".format(quantity, layerType)) else: plt.savefig("{}.pdf".format(quantity)) plt.close()
def __getitem__(self, index): X = [] fields = self.data.fields for f in fields[:-1]: tensors = self.data[index, f] try: tensors = torch.from_numpy(ak.to_numpy(tensors)) except ValueError: # This can be slow with a lot of tensors (index > 1000?) tensors = [torch.from_numpy(ak.to_numpy(t)) for t in tensors] X.append(tensors) y = torch.tensor(self.data[index, fields[-1]]) return X, y
def center_points(angles_to_center, all_angles, cyclic_range=(-np.pi, np.pi)): # convert the angles to center into x and y angles_to_center = ak.to_numpy(angles_to_center) center_x = np.sum(np.sin(angles_to_center)) center_y = np.sum(np.sin(angles_to_center)) center_angle = np.arctan2(center_y, center_x) # now find the new range new_range = (center_angle - np.pi, center_angle + np.pi) # shift the points into the range all_angles = ak.to_numpy(all_angles) all_angles[all_angles < new_range[0]] += 2 * np.pi all_angles[all_angles > new_range[1]] -= 2 * np.pi return new_range, all_angles
def set_batch(self, idx): """ Loads a batch of data of a specific data type and then stores it for later retrieval. Pads ragged track and PFO arrays to make them rectilinear and reshapes arrays into correct shape for training. The clip option in ak.pad_none will truncate/extend each array so that they are all of a specific length- here we limit nested arrays to 20 items :param idx: The index of the batch to be processed """ batch, sig_bkg_labels_np_array = self.next_batch() track_np_arrays = self.pad_and_reshape_nested_arrays(batch, "TauTracks", max_items=20) conv_track_np_arrays = self.pad_and_reshape_nested_arrays(batch, "ConvTrack", max_items=20) shot_pfo_np_arrays = self.pad_and_reshape_nested_arrays(batch, "ShotPFO", max_items=20) neutral_pfo_np_arrays = self.pad_and_reshape_nested_arrays( batch, "NeutralPFO", max_items=20) jet_np_arrays = self.reshape_arrays(batch, "TauJets") # Compute labels labels_np_array = np.zeros((len(sig_bkg_labels_np_array), 4)) if sig_bkg_labels_np_array[0] == 0: labels_np_array[:, 0] = 1 else: truth_decay_mode_np_array = ak.to_numpy( batch[self._variables_dict["DecayMode"]]).astype(np.int64) labels_np_array = labeler(truth_decay_mode_np_array, labels_np_array) # Apply pT re-weighting weight_np_array = np.ones(len(labels_np_array)) if self.class_label == 0: weight_np_array = pt_reweight( ak.to_numpy( batch[self._variables_dict["Weight"]]).astype("float32")) result = Result(track_np_arrays, neutral_pfo_np_arrays, shot_pfo_np_arrays, conv_track_np_arrays, jet_np_arrays, labels_np_array, weight_np_array) # logger.log(f"Tracks Max = {np.amax(result.tracks)}") # logger.log(f"NeutralPFOs Max = {np.amax(result.neutral_PFOs)}") # logger.log(f"ShotPFOs Max = {np.amax(result.shot_PFOs)}") # logger.log(f"ConvTracks Max = {np.amax(result.conv_tracks)}") # logger.log(f"Jets Max = {np.amax(result.jets)}") return result
def test_numpy(): # This all is fine. assert np.array_equal( ak.to_numpy(ak.from_iter([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]])), np.array([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]]), ) assert (str( ak.to_numpy(ak.from_iter([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]])).dtype) == "complex128") assert ak.Array(np.array([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]])).tolist() == [ [(1 + 1j), (2 + 2j)], [(3 + 3j), (4 + 4j)], ]
def getImage(pts, etas, phis, bins): xlim = [-2.5, 2.5] ylim = [-3.141593, 3.141593] hs = [] for i in range(len(etas)): h = np.histogram2d(ak.to_numpy(etas[i]), ak.to_numpy(phis[i]), weights=ak.to_numpy(pts[i]), bins=bins, range=[xlim, ylim]) hs.append(h[0]) return np.stack(hs)
def get_all_vars(varsIn, varSet, normMean, normStd): dSets = [] dataSet = pd.DataFrame() for var in varSet: inputArr = varsIn[var][0] if variables[var][4] == 2: inputArr = np.repeat(ak.to_numpy(inputArr), ak.to_numpy(varsIn["njetsAK8"][0])) if variables[var][5] == 1: inputArr = ak.flatten(inputArr) elif variables[var][5] == 2: inputArr = ak.flatten(inputArr) dataSet[var] = inputArr dataSet = normalize(dataSet, normMean, normStd) return dataSet
def test(): empty1 = ak.Array(ak.layout.EmptyArray(), check_valid=True) empty2 = ak.Array( ak.layout.ListOffsetArray64( ak.layout.Index64(np.array([0, 0, 0, 0], dtype=np.int64)), ak.layout.EmptyArray(), ), check_valid=True, ) array = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]], check_valid=True) assert ak.to_numpy(empty1).dtype.type is np.float64 assert ak.to_list(array[empty1]) == [] assert ( ak.to_list( array[ empty1, ] ) == [] ) assert ak.to_list(array[empty2]) == [[], [], []] assert ak.to_list( array[ empty2, ] ) == [[], [], []]
def pad_and_reshape_nested_arrays(self, batch, variable_type, max_items=10): """ Function that acts on nested data to read relevant variables, pad, reshape and convert data from uproot into rectilinear numpy arrays :param batch: A dict of awkward arrays from uproot :param variable_type: Variable type to be selected e.g. Tracks, Neutral PFO, Jets etc... :param max_items: Maximum number of tracks/PFOs etc... to be associated to event :return: a rectilinear numpy array of shape: (num events in batch, number of variables belonging to variable type, max_items) """ variables = self._variables_dict[variable_type] np_arrays = np.zeros((ak.num(batch[variables[0]], axis=0), len(variables), max_items)) dummy_val = 0 thresh = 45 for i in range(0, len(variables)): ak_arr = batch[variables[i]] ak_arr = ak.pad_none(ak_arr, max_items, clip=True, axis=1) arr = ak.to_numpy(abs(ak_arr)).filled(dummy_val) np_arrays[:, i] = arr np_arrays = apply_scaling(np_arrays, thresh=thresh, dummy_val=dummy_val) np_arrays = np.nan_to_num(np_arrays, posinf=0, neginf=0, copy=False).astype("float64") return np_arrays
def process(self, events): output = self.accumulator.identity() dataset = events.metadata['dataset'] integratedLuminosity = 137.19*1000 # fb^{-1} to pb^{-1} ht = events.HT weights = integratedLuminosity*events.CrossSection[ht > 1200]/len(events) GenParticles = events.GenParticles finalParticles = (GenParticles.Status == 1) & (GenParticles.pt > 1) & (abs(GenParticles.eta) < 2.5) & (GenParticles.Charge != 0) nTracksGEN = ak.sum(finalParticles[ht > 1200], axis=1) tracks = events.Tracks tracks_pt = np.sqrt(tracks.x**2 + tracks.y**2) tracks_eta = np.arcsinh(tracks.z / tracks_pt) track_cut = (tracks_pt > 1.) & (abs(tracks_eta) < 2.5) & (tracks.fromPV0 >= 2) & tracks.matchedToPFCandidate nTracksRECO = ak.to_numpy(ak.sum(track_cut[ht > 1200], axis=1)) output["sumw"][dataset] += len(events) output["nTracksHist"].fill( dataset="CMSSW GEN", nTracks=nTracksGEN, weight=weights ) output["nTracksHist"].fill( dataset="CMSSW RECO", nTracks=nTracksRECO, weight=weights ) return output
def test_date_time(): numpy_array = np.array( ["2020-07-27T10:41:11", "2019-01-01", "2020-01-01"], "datetime64[s]" ) array = ak.Array(numpy_array) assert str(array.type) == "3 * datetime64" assert array.tolist() == [ np.datetime64("2020-07-27T10:41:11"), np.datetime64("2019-01-01T00:00:00"), np.datetime64("2020-01-01T00:00:00"), ] for i in range(len(array)): assert ak.to_numpy(array)[i] == numpy_array[i] date_time = np.datetime64("2020-07-27T10:41:11.200000011", "us") array1 = ak.Array(np.array(["2020-07-27T10:41:11.200000011"], "datetime64[us]")) assert np.datetime64(array1[0], "us") == date_time assert ak.to_list(ak.from_iter(array1)) == [ np.datetime64("2020-07-27T10:41:11.200000") ] assert ak.max(array) == numpy_array[0] assert ak.min(array) == numpy_array[1]
def test(): a = ak.to_numpy(ak.Array({"A": [1, 2, 3], "B": [4, None, 5]})) assert a["A"].data.tolist() == [1, 2, 3] assert a["A"].mask.tolist() == [False, False, False] assert a["B"].data[0] == 4 assert a["B"].data[2] == 5 assert a["B"].mask.tolist() == [False, True, False]
def __compute_cell_posD(self, arr: ak.Array) -> np.ndarray: # Convert to numpy array arr = ak.to_numpy(arr, allow_missing=False) # the unique entries will later corrispond # to the lower border of the binning uniques = np.unique(arr) # calculate the distance dist = uniques[1:] - uniques[:-1] # remove the distances under a thereshold idx_to_del = [] for i, d in enumerate(dist): if abs(d) < conf.mapper.threshold: idx_to_del.append(i) dist = np.delete(dist, idx_to_del) uniques = np.delete(uniques, [i + 1 for i in idx_to_del]) # Count the frequency of distances and short by frequency # np.unique: get unique values and the counts # np.sort: sort the array by the lasts axis / the counts # [...,::-1] to start with the highest element tmparr = np.sort(np.unique(dist, return_counts=1))[..., ::-1] # [0] only get the values, not the counts # Convert to a list to make removing elements less painful hval = list(tmparr[0]) del tmparr # first all vales in hval that are close to each other are collected in one compare_idx, running_idx = 0, 1 while compare_idx < len(hval) - 1: assert compare_idx != running_idx if np.isclose(hval[compare_idx], hval[running_idx], rtol=conf.mapper.threshold): del hval[running_idx] else: running_idx = running_idx + 1 if running_idx == len(hval): compare_idx = compare_idx + 1 running_idx = compare_idx + 1 # Next the we iterate over the distances # We check if the distances are close to the most frequent # values and if so the distance is replace for idx, d in enumerate(dist): for val in hval: if np.isclose(d, val, rtol=conf.mapper.threshold): dist[idx] = val continue sum = min(uniques) cellpositions = [sum] for val in dist: sum += val cellpositions.append(sum) logger.debug(f"dist {dist} \n \n cellpositons \n {cellpositions}") cellpositions = np.array(cellpositions) assert all(np.diff(cellpositions) > 0) return cellpositions
def __getpixel(self, val: ak.Array, var: str) -> np.int: val = ak.to_numpy(val) border = self.binbordersD[var] pos = np.digitize(val, border) assert 0 <= pos assert pos < len(self.cell_posD[var]) return pos
def get_root_rest_energies(roots, energies, pxs, pys, pzs): """ Find the energies (of anything really, but presumably jets) in the rest frame of particles identified a root particles Parameters ---------- roots : array of bool mask identifying the root particles energies : array like of floats the energies of the particles pxs : array like of floats the momentum in the x direction of the particles pys : array like of floats the momentum in the y direction of the particles pzs : array like of floats the momentum in the z direction of the particles Returns ------- energies : array like of floats the energies of the particles in the rest frame of the root """ # if we are to use the roots as indices they must have this form energies = ak.to_numpy(energies) masses2 = energies**2 - pxs**2 - pys**2 - pzs**2 pxs = pxs - ak.flatten(pxs[roots]) pys = pys - ak.flatten(pys[roots]) pzs = pzs - ak.flatten(pzs[roots]) energies = np.sqrt(masses2 + pxs**2 + pys**2 + pzs**2) return energies
def lepton_pairing( Electron : List[ak_array], Muon : List[ak_array]) -> (List[float], List[float], List[float]) : """ Electron and Muon contains values of ["PT", "Eta", "Phi", "Charge"] respectively """ return_Electron = ak.Array([]) return_Muon = ak.Array([]) delta_etas = [] delta_phis = [] Electron = ak.to_list(Electron) Muon = ak.to_list(Muon) n_events = len(Electron[0]) # each particle has same n of events but may have # different n of particles in each event for idx in range(n_events): # for each event for electron_jdx in range(len(Electron[0][idx])): for muon_jdx in range(len(Muon[0][idx])): print("Electron[-1][idx][electron_jdx]: ", Electron[-1][idx][electron_jdx]) print("Muon[-1][idx][muon_jdx]: ", Muon[-1][idx][muon_jdx]) if (Electron[-1][idx][electron_jdx] == -Muon[-1][idx][muon_jdx] ): # if charge sign of electron is opposite of muon, then pairing is made and # add the relevant values to the final return value e_placeholder = [] for value in Electron: e_placeholder.append(value[idx][electron_jdx]) return_Electron = ak.concatenate((return_Electron, ak.Array([e_placeholder])), axis =0) # print(ak.to_numpy(return_Electron).shape) # print("return_Electron: ", return_Electron) m_placeholder = [] for value in Muon: m_placeholder.append(value[idx][muon_jdx]) return_Muon = ak.concatenate((return_Muon, ak.Array([m_placeholder])), axis =0) # print(ak.to_numpy(return_Muon).shape) # print("return_Muon: ", return_Muon) # add delta eta and phi values delta_etas.append(delta_eta(Electron[1][idx][electron_jdx], Muon[1][idx][muon_jdx])) delta_phis.append(delta_phi(Electron[-2][idx][electron_jdx], Muon[-2][idx][muon_jdx])) # assign the respective charges as 10 (arbitrary number) to signify that they # have been chosen already, and don't get detected next time # print(type(Electron[-1][idx][electron_jdx])) Electron[-1][idx][electron_jdx] = 10.0 Muon[-1][idx][muon_jdx] = 10.0 delta_package = np.array([delta_etas, delta_phis]) return (ak.to_numpy(return_Electron), ak.to_numpy(return_Muon), delta_package)
def process_1tau_1lep(taus_all, muons_all, electrons_all, genWeight_all): out_hists = {} ntau = ak.num(taus_all) nmuon = ak.num(muons_all) nelectron = ak.num(electrons_all) mask_tau_e = (ntau == 1) & (nelectron == 1) & (nmuon == 0) mask_tau_mu = (ntau == 1) & (nmuon == 1) & (nelectron == 0) taus_taue = taus_all[mask_tau_e] electrons_taue = electrons_all[mask_tau_e] genWeight_taue = genWeight_all[mask_tau_e] taus_taumu = taus_all[mask_tau_mu] muons_taumu = muons_all[mask_tau_mu] genWeight_taumu = genWeight_all[mask_tau_mu] if len(taus_taue) > 0: tau0 = taus_taue[:, 0] ele0 = electrons_taue[:, 0] mtaue = np.sqrt( 2 * tau0.pt * ele0.pt * (np.cosh(tau0.eta - ele0.eta) - np.cos(tau0.phi - ele0.phi))) #mtaue = (taus_taue[:,0] + electrons_taue[:,0]).mass out_hists["mtaue"] = Hist1D(ak.to_numpy(mtaue), bins=bin_mZ, weights=ak.to_numpy(genWeight_taue), label="mtaue") dR_tau_e = deltaR_devfunc(taus_taue, electrons_taue) out_hists["dR_tau_e"] = Hist1D(ak.to_numpy(dR_tau_e), bins=bin_dR, weights=ak.to_numpy(genWeight_taue), label="dR_taue") if len(taus_taumu) > 0: tau0 = taus_taumu[:, 0] mu0 = muons_taumu[:, 0] mtaumu = np.sqrt( 2 * tau0.pt * mu0.pt * (np.cosh(tau0.eta - mu0.eta) - np.cos(tau0.phi - mu0.phi))) #mtaumu = (taus_taumu[:,0] + muons_taumu[:,0]).mass out_hists["mtaumu"] = Hist1D(ak.to_numpy(mtaumu), bins=bin_mZ, weights=ak.to_numpy(genWeight_taumu), label="mtaumu") dR_tau_mu = deltaR_devfunc(taus_taumu, muons_taumu) out_hists["dR_tau_mu"] = Hist1D(ak.to_numpy(dR_tau_mu), bins=bin_dR, weights=ak.to_numpy(genWeight_taumu), label="dR_taumu") return out_hists
def _preprocess_fitinf(self, fitinf): # preprocess the fitinf a bit - yay! n_columns = max(km3io.definitions.fitparameters.values()) + 1 fitinf_array = np.ma.filled( ak.to_numpy(ak.pad_none(fitinf, target=n_columns, axis=-1)), fill_value=np.nan, ).astype("float32") return fitinf_array
def apply_roccor(df, rochester, is_mc): if is_mc: hasgen = ~np.isnan(ak.fill_none(df.Muon.matched_gen.pt, np.nan)) mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(df.Muon.pt)).shape) mc_rand = ak.unflatten(mc_rand, ak.num(df.Muon.pt, axis=1)) corrections = np.array(ak.flatten(ak.ones_like(df.Muon.pt))) errors = np.array(ak.flatten(ak.ones_like(df.Muon.pt))) mc_kspread = rochester.kSpreadMC( df.Muon.charge[hasgen], df.Muon.pt[hasgen], df.Muon.eta[hasgen], df.Muon.phi[hasgen], df.Muon.matched_gen.pt[hasgen], ) mc_ksmear = rochester.kSmearMC( df.Muon.charge[~hasgen], df.Muon.pt[~hasgen], df.Muon.eta[~hasgen], df.Muon.phi[~hasgen], df.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) errspread = rochester.kSpreadMCerror( df.Muon.charge[hasgen], df.Muon.pt[hasgen], df.Muon.eta[hasgen], df.Muon.phi[hasgen], df.Muon.matched_gen.pt[hasgen], ) errsmear = rochester.kSmearMCerror( df.Muon.charge[~hasgen], df.Muon.pt[~hasgen], df.Muon.eta[~hasgen], df.Muon.phi[~hasgen], df.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) hasgen_flat = np.array(ak.flatten(hasgen)) corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread)) corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) errors[hasgen_flat] = np.array(ak.flatten(errspread)) errors[~hasgen_flat] = np.array(ak.flatten(errsmear)) corrections = ak.unflatten(corrections, ak.num(df.Muon.pt, axis=1)) errors = ak.unflatten(errors, ak.num(df.Muon.pt, axis=1)) else: corrections = rochester.kScaleDT(df.Muon.charge, df.Muon.pt, df.Muon.eta, df.Muon.phi) errors = rochester.kScaleDTerror(df.Muon.charge, df.Muon.pt, df.Muon.eta, df.Muon.phi) df["Muon", "pt_roch"] = df.Muon.pt * corrections df["Muon", "pt_roch_up"] = df.Muon.pt_roch + df.Muon.pt * errors df["Muon", "pt_roch_down"] = df.Muon.pt_roch - df.Muon.pt * errors
def get_scale_factor(self, jets, passing_cut): '''Starting from a jet collection and a string pointing to the flag defining if the jet is b-tagged or not computes the per-jet weight to be used. Supports only a single WP for the moment''' # First of all flatten everything to make it easier to handle pt = ak.to_numpy(ak.flatten(jets.pt)) eta = ak.to_numpy(ak.flatten(jets.eta)) flav = ak.to_numpy(ak.flatten(jets.hadronFlavour)) pass_wp = ak.to_numpy(ak.flatten(jets[passing_cut])) # Get the MC efficiency eff = self.efficiency_(pt, eta, flav) # for each systematic/central value compute the proper SF # cache the SF values as sometimes they are repeated, there # might also be systematic combinations that are never accessed # but pruning them at the beginning can be hard # use schema to define combinations, lcb is a tuple with the sf keys # for light, charm, bottom for each systematic flavour_sf_cache = {} scale_factors = {} # our final product for key, lcb in self.schema_.items(): # populate cache if needed for i in range(3): flavour_sf_cache[lcb[i]] = flavour_sf_cache.get( # for some reason there is an additional dimension, pass_wp has no effect lcb[i], self.sf_[lcb[i]](eta, pt, pass_wp)) scale_factors[key] = eff * self.match_flav_( flavour_sf_cache[lcb[0]], flavour_sf_cache[lcb[1]], flavour_sf_cache[lcb[2]], flav) # use SF and eff to compute p(data) and p(MC) p_data = { key: np.where(pass_wp, val, 1 - val) for key, val in scale_factors.items() } p_mc = np.where(pass_wp, eff, 1 - eff) # return the unflattened version of the ratio return { key: ak.unflatten(i / p_mc, ak.num(jets.pt)) for key, i in p_data.items() }
def make_composite_distributions(): global tree matchedMask = tree["t5_isFake"].array() == 0 layers = np.array(list(map(process_layers, ak.flatten(tree["t5_layer_binary"].array())))) layerTypes = np.array(list(map(process_layerType, layers))) # layerTypes = np.array(list(map(process_numbers, layers))) unique_layerTypes = np.unique(layerTypes, axis=0) unique_layerTypes = np.append(unique_layerTypes,"") print(unique_layerTypes) for layerType in unique_layerTypes: print("layerType = {}".format(layerType)) innerRadius = ak.to_numpy(ak.flatten(tree["t5_innerRadius"].array())) innerRadiusMin = ak.to_numpy(ak.flatten(tree["t5_innerRadiusMin"].array())) innerRadiusMax = ak.to_numpy(ak.flatten(tree["t5_innerRadiusMax"].array())) outerRadius = ak.to_numpy(ak.flatten(tree["t5_outerRadius"].array())) outerRadiusMin = ak.to_numpy(ak.flatten(tree["t5_outerRadiusMin"].array())) outerRadiusMax = ak.to_numpy(ak.flatten(tree["t5_outerRadiusMax"].array())) qArray = (outerRadiusMin - innerRadiusMax) / innerRadiusMax qArray[innerRadius > outerRadius] = (innerRadiusMin[innerRadius > outerRadius] - outerRadiusMax[innerRadius > outerRadius])/ innerRadiusMin[innerRadius > outerRadius] qArrayInv = (1.0/innerRadiusMax - 1.0/outerRadiusMin) / (1.0/innerRadiusMax) qArrayInv[innerRadius > outerRadius] = (1.0/ outerRadiusMax[innerRadius > outerRadius] - 1.0/innerRadiusMin[innerRadius > outerRadius])/(1.0/innerRadiusMin[innerRadius > outerRadius]) if layerType == "": qArraySimTrackMatched = qArray[ak.to_numpy(ak.flatten(matchedMask))] qArrayInvSimTrackMatched = qArrayInv[ak.to_numpy(ak.flatten(matchedMask))] else: qArray = qArray[layerTypes == layerType] qArrayInv = qArrayInv[layerTypes == layerType] qArraySimTrackMatched = qArray[ak.to_numpy(ak.flatten(matchedMask)[layerTypes == layerType])] qArrayInvSimTrackMatched = qArrayInv[ak.to_numpy(ak.flatten(matchedMask)[layerTypes == layerType])] print("deltaR integral = ", len(qArray), "deltaR sim track matched integral = ", len(qArraySimTrackMatched)) print("deltaR integral above 0 = ", sum(qArray >= 0), "deltaR sim track matched integral above 0 = ", sum(qArraySimTrackMatched >= 0)) print("deltaInvR integral = ", len(qArrayInv), "deltaInvR sim track matched integral = ", len(qArrayInvSimTrackMatched)) print("deltaInvR integral above 0 = ", sum(qArray >= 0), "deltaInvR sim track matched integral above 0 = ", sum(qArrayInvSimTrackMatched >= 0)) make_plots(abs(qArray[qArray > 0]), abs(qArraySimTrackMatched[qArraySimTrackMatched > 0]), "deltaR/innerRadius",layerType) make_plots(abs(qArrayInv[qArrayInv > 0]), abs(qArrayInvSimTrackMatched[qArrayInvSimTrackMatched > 0]), "delta(1/R)/1/innerRadius", layerType)