Beispiel #1
0
def calcGeometricOffset(rCone, E, f_id, mu, mucut):
    E = ak.to_numpy(ak.flatten(E)).reshape(len(E), nEta)[mu > mucut]
    f_id = ak.to_numpy(ak.flatten(f_id)).reshape(len(f_id), nEta)[mu > mucut]
    if (len(f_id) != len(E)):
        print("Error")
    area = 2 * np.pi * (etabins[1:] - etabins[:-1])
    return E * f_id * np.pi * rCone * rCone / 255. / np.cosh(etaC) / area
Beispiel #2
0
    def __call__(self, runs, lumis):
        """Check if run and lumi are valid

        Parameters
        ----------
            runs : numpy.ndarray
                Vectorized list of run numbers
            lumis : numpy.ndarray
                Vectorized list of lumiSection numbers

        Returns
        -------
            mask_out : numpy.ndarray
                An array of dtype `bool` where valid (run, lumi) tuples
                will have their corresponding entry set ``True``.
        """
        # fill numba typed dict
        _masks = Dict.empty(key_type=types.uint32, value_type=types.uint32[:])
        for k, v in self._masks.items():
            _masks[k] = v

        if isinstance(runs, ak.highlevel.Array):
            runs = ak.to_numpy(runs)
        if isinstance(lumis, ak.highlevel.Array):
            lumis = ak.to_numpy(lumis)
        mask_out = np.zeros(dtype="bool", shape=runs.shape)
        LumiMask._apply_run_lumi_mask_kernel(_masks, runs, lumis, mask_out)
        return mask_out
 def normalize(val, cut):
     if cut is None:
         ar = ak.to_numpy(ak.fill_none(val, np.nan))
         return ar
     else:
         ar = ak.to_numpy(ak.fill_none(val[cut], np.nan))
         return ar
def getimage(pt,eta,phi):
    hs=[]
    rnge=((-2.4,2.4),(-np.pi,np.pi))
    
    for i in range(len(eta)):
        h=np.histogram2d(ak.to_numpy(eta[i]),ak.to_numpy(phi[i]),weights=ak.to_numpy(pt[i]),bins=64,range=rnge)
        hs.append(h[0])
    return np.stack(hs)
Beispiel #5
0
def process_event(weights, evt_vars):  #, bjets):
    out_hists = {}
    out_hists["MET"] = Hist1D(ak.to_numpy(evt_vars.MET_pt),
                              bins=bin_met,
                              label="MET",
                              weights=ak.to_numpy(genWeight))
    #out_hists["njets"] = Hist1D(ak.to_numpy(evt_vars.nJet), bins = bin_njet, label="njet", weights = ak.to_numpy(genWeight))
    #out_hists["nbjets"] = Hist1D(ak.num(bjets), bins = bin_bjet)
    out_hists["weight"] = ak.sum(weights)
    return out_hists
def make_plots(qArray, qArraySimTrackMatched, quantity, layerType):
    if len(qArray) == 0 or len(qArraySimTrackMatched) == 0:
        print("{} has no entries. Skipping".format(layerType))
        return
    minValue = min(qArray[qArray > -999])
    maxValue = max(qArray)
    histMinLimit = 1.1 * minValue if minValue < 0 else 0.9 * minValue
    histMaxLimit = 1.1 * maxValue if maxValue > 0 else 0.9 * maxValue
    binning = np.linspace(histMinLimit, histMaxLimit, 1000)

    allHist = Hist1D(ak.to_numpy(qArray[qArray > -999]),
                     bins=binning,
                     label="{}".format(quantity))
    simtrackMatchedHist = Hist1D(ak.to_numpy(
        qArraySimTrackMatched[qArraySimTrackMatched > -999]),
                                 bins=binning,
                                 label="Sim track matched {}".format(quantity))
    fig, ax = plt.subplots()
    ax.set_yscale("log")
    allHist.plot(alpha=0.8, color="C0", label="all", histtype="stepfilled")
    simtrackMatchedHist.plot(alpha=0.8,
                             color="C3",
                             label="sim track matched",
                             histtype="stepfilled")
    if layerType == "":
        if "TripletPt" in quantity:
            title = quantity.replace("TripletPt", "Triplet radius")
        else:
            title = quantity
        plt.title("{}".format(title))
    else:
        plt.title("{} type {}".format(quantity, layerType))

    plt.suptitle("Sample = {} Tag = {}".format(sys.argv[3], sys.argv[4]))
    # extra job for the composite dudes
    quantity = quantity.replace("(", " ")
    quantity = quantity.replace(")", "")
    quantity = quantity.replace("/", "by")
    quantity = quantity.replace("-", "minus")
    quantity = quantity.replace(" ", "_")
    if quantity[0] == "_":
        quantity = quantity[1:]
    if len(sys.argv) > 2:
        if layerType != "":
            plt.savefig("{}/{}_{}.pdf".format(sys.argv[2], quantity,
                                              layerType))

        else:
            plt.savefig("{}/{}.pdf".format(sys.argv[2], quantity))
    else:
        if layerType != "":
            plt.savefig("{}_{}.pdf".format(quantity, layerType))
        else:
            plt.savefig("{}.pdf".format(quantity))
    plt.close()
Beispiel #7
0
def test_datasetprovider_awkward_exporter_single_tensor_same_shape():
    """
    The core assumption for simplest cases is that one
    system will be featurized as a single tensor, and that
    all the tensors will be of the same shape across systems.

    If that's the case:

    - DatasetProvider.to_numpy() will work and will return
      a X, y tuple of arrays
    - DatasetProvider.to_dict_of_arrays() will work and will
      return a dict of arrays with X, y keys.

    Note that `.to_numpy()` won't work if the core assumptions
    are broken. For those cases, `.to_dict_of_arrays()` is
    recommended instead.
    """
    from kinoml.core.ligands import RDKitLigand
    from kinoml.features.ligand import MorganFingerprintFeaturizer
    from kinoml.features.core import Concatenated, TupleOfArrays
    import awkward as ak

    conditions = AssayConditions()
    systems = [
        System([RDKitLigand.from_smiles(smi)]) for smi in ("CCCCC", "CCCCCCCC")
    ]
    measurements = [
        BaseMeasurement(50, conditions=conditions, system=systems[0]),
        BaseMeasurement(30, conditions=conditions, system=systems[1]),
    ]

    dataset = DatasetProvider(measurements=measurements)

    featurizer1 = MorganFingerprintFeaturizer(radius=2, nbits=512)
    featurizer2 = MorganFingerprintFeaturizer(radius=2, nbits=1024)
    concatenate = Concatenated([featurizer1, featurizer2], axis=1)
    aggregated = TupleOfArrays([concatenate])
    aggregated.featurize(systems)
    for system in systems:
        assert system.featurizations["last"][0].shape[0] == (1024 + 512)

    # With a single tensor per system, we build a unified X tensor
    # First dimension in X and y must match
    X, y = dataset.to_numpy()
    # This extra dimension here V
    # comes from the TupleOfArrays aggregation
    assert X.shape[:3] == (2, 1, (1024 + 512))
    assert X.shape[0] == y.shape[0]

    # With dict_of_arrays and single tensor per system,
    # the behavior is essentially the same
    (Xa, ), ya = dataset.to_awkward()

    assert ak.to_numpy(Xa).shape == (2, (1024 + 512))
    assert ak.to_numpy(ya).shape == (2, )
Beispiel #8
0
def test_numpyarray():
    for dtype1 in ("i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4", "f8",
                   "?"):
        for dtype2 in ("i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f4",
                       "f8", "?"):
            for dtype3 in (
                    "i1",
                    "i2",
                    "i4",
                    "i8",
                    "u1",
                    "u2",
                    "u4",
                    "u8",
                    "f4",
                    "f8",
                    "?",
            ):
                for dtype4 in (
                        "i1",
                        "i2",
                        "i4",
                        "i8",
                        "u1",
                        "u2",
                        "u4",
                        "u8",
                        "f4",
                        "f8",
                        "?",
                ):
                    one = np.array([0, 1, 2], dtype=dtype1)
                    two = np.array([3, 0], dtype=dtype2)
                    three = np.array([], dtype=dtype3)
                    four = np.array([4, 5, 0, 6, 7], dtype=dtype4)
                    combined = np.concatenate([one, two, three, four])

                    ak_combined = ak.layout.NumpyArray(one).mergemany([
                        ak.layout.NumpyArray(two),
                        ak.layout.NumpyArray(three),
                        ak.layout.NumpyArray(four),
                    ])

                    assert ak.to_list(ak_combined) == combined.tolist()
                    assert ak.to_numpy(ak_combined).dtype == combined.dtype

                    ak_combined = ak.layout.NumpyArray(one).mergemany([
                        ak.layout.NumpyArray(two),
                        ak.layout.EmptyArray(),
                        ak.layout.NumpyArray(four),
                    ])

                    assert ak.to_list(ak_combined) == combined.tolist()
                    assert (ak.to_numpy(ak_combined).dtype == np.concatenate(
                        [one, two, four]).dtype)
Beispiel #9
0
def hash_root_file(path: Path, ordering_invariant: bool = True) -> str:
    rf = uproot.open(path)

    gh = hashlib.sha256()

    for tree_name in sorted(rf.keys()):
        gh.update(tree_name.encode("utf8"))

        try:
            tree = rf[tree_name]
            if not isinstance(tree, uproot.TTree):
                continue
        except NotImplementedError:
            continue
        keys = list(sorted(tree.keys()))

        branches = tree.arrays(library="ak")

        if not ordering_invariant:

            h = hashlib.sha256()
            for name in keys:
                h.update(name.encode("utf8"))
                arr = branches[name]
                arr = ak.flatten(arr, axis=None)
                arr = np.array(arr)
                h.update(arr.tobytes())
            gh.update(h.digest())

        else:
            items = np.array([])

            for row in zip(*[branches[b] for b in keys]):
                h = hashlib.md5()
                for obj in row:
                    if isinstance(obj, ak.highlevel.Array):
                        if obj.ndim == 1:
                            h.update(ak.to_numpy(obj).tobytes())
                        else:
                            arr = ak.to_numpy(ak.flatten(obj, axis=None))
                            h.update(arr.tobytes())
                    else:
                        h.update(np.array([obj]).tobytes())
                items = np.append(items, h.digest())

            items.sort()

            h = hashlib.sha256()
            h.update("".join(keys).encode("utf8"))
            h.update(items.tobytes())

            gh.update(h.digest())
    return gh.hexdigest()
def make_plots(qArray, qArraySimTrackMatched, quantity, layerType):
    minValue = min(qArray[qArray > -999])
    maxValue = max(qArray)
    histMinLimit = minValue * 1.1 if minValue < 0 else minValue * 0.9
    histMaxLimit = maxValue * 0.9 if maxValue < 0 else maxValue * 1.1
    if abs(histMaxLimit -
           histMinLimit) > 10 and histMinLimit > 0 or "/" in quantity:
        binning = np.logspace(np.log10(histMinLimit), np.log10(histMaxLimit),
                              1000)
    else:
        binning = np.linspace(histMinLimit, histMaxLimit, 1000)

    allHist = Hist1D(ak.to_numpy(qArray[qArray > -999]),
                     bins=binning,
                     label="{}".format(quantity))
    simtrackMatchedHist = Hist1D(ak.to_numpy(
        qArraySimTrackMatched[qArraySimTrackMatched > -999]),
                                 bins=binning,
                                 label="Sim track matched {}".format(quantity))

    fig = plt.figure()
    plt.yscale("log")
    if abs(histMaxLimit -
           histMinLimit) > 10 and histMinLimit > 0 or "/" in quantity:
        plt.xscale("log")

    allHist.plot(alpha=0.8, color="C0", label="all")
    simtrackMatchedHist.plot(alpha=0.8, color="C3", label="sim track matched")
    if layerType == "":
        plt.title("{}".format(quantity))
    else:
        plt.title("{} type {}".format(quantity, layerType))

    plt.suptitle("Sample = {} Tag = {}".format(sys.argv[3], sys.argv[4]))
    #extra job for the composite dudes
    quantity = quantity.replace("(", " ")
    quantity = quantity.replace(")", "")
    quantity = quantity.replace("/", "by")
    quantity = quantity.replace("-", "minus")
    quantity = quantity.replace(" ", "_")
    if len(sys.argv) > 2:
        if layerType != "":
            plt.savefig("{}/{}_{}.pdf".format(sys.argv[2], quantity,
                                              layerType))
        else:
            plt.savefig("{}/{}.pdf".format(sys.argv[2], quantity))
    else:
        if layerType != "":
            plt.savefig("{}_{}.pdf".format(quantity, layerType))
        else:
            plt.savefig("{}.pdf".format(quantity))
    plt.close()
Beispiel #11
0
 def __getitem__(self, index):
     X = []
     fields = self.data.fields
     for f in fields[:-1]:
         tensors = self.data[index, f]
         try:
             tensors = torch.from_numpy(ak.to_numpy(tensors))
         except ValueError:
             # This can be slow with a lot of tensors (index > 1000?)
             tensors = [torch.from_numpy(ak.to_numpy(t)) for t in tensors]
         X.append(tensors)
     y = torch.tensor(self.data[index, fields[-1]])
     return X, y
Beispiel #12
0
def center_points(angles_to_center, all_angles, cyclic_range=(-np.pi, np.pi)):
    # convert the angles to center into x and y
    angles_to_center = ak.to_numpy(angles_to_center)
    center_x = np.sum(np.sin(angles_to_center))
    center_y = np.sum(np.sin(angles_to_center))
    center_angle = np.arctan2(center_y, center_x)
    # now find the new range
    new_range = (center_angle - np.pi, center_angle + np.pi)
    # shift the points into the range
    all_angles = ak.to_numpy(all_angles)
    all_angles[all_angles < new_range[0]] += 2 * np.pi
    all_angles[all_angles > new_range[1]] -= 2 * np.pi
    return new_range, all_angles
Beispiel #13
0
    def set_batch(self, idx):
        """
        Loads a batch of data of a specific data type and then stores it for later retrieval.
        Pads ragged track and PFO arrays to make them rectilinear
        and reshapes arrays into correct shape for training. The clip option in ak.pad_none will truncate/extend each
        array so that they are all of a specific length- here we limit nested arrays to 20 items
        :param idx: The index of the batch to be processed
        """
        batch, sig_bkg_labels_np_array = self.next_batch()

        track_np_arrays = self.pad_and_reshape_nested_arrays(batch,
                                                             "TauTracks",
                                                             max_items=20)
        conv_track_np_arrays = self.pad_and_reshape_nested_arrays(batch,
                                                                  "ConvTrack",
                                                                  max_items=20)
        shot_pfo_np_arrays = self.pad_and_reshape_nested_arrays(batch,
                                                                "ShotPFO",
                                                                max_items=20)
        neutral_pfo_np_arrays = self.pad_and_reshape_nested_arrays(
            batch, "NeutralPFO", max_items=20)
        jet_np_arrays = self.reshape_arrays(batch, "TauJets")

        # Compute labels
        labels_np_array = np.zeros((len(sig_bkg_labels_np_array), 4))
        if sig_bkg_labels_np_array[0] == 0:
            labels_np_array[:, 0] = 1
        else:
            truth_decay_mode_np_array = ak.to_numpy(
                batch[self._variables_dict["DecayMode"]]).astype(np.int64)
            labels_np_array = labeler(truth_decay_mode_np_array,
                                      labels_np_array)

        # Apply pT re-weighting
        weight_np_array = np.ones(len(labels_np_array))
        if self.class_label == 0:
            weight_np_array = pt_reweight(
                ak.to_numpy(
                    batch[self._variables_dict["Weight"]]).astype("float32"))

        result = Result(track_np_arrays, neutral_pfo_np_arrays,
                        shot_pfo_np_arrays, conv_track_np_arrays,
                        jet_np_arrays, labels_np_array, weight_np_array)

        # logger.log(f"Tracks Max = {np.amax(result.tracks)}")
        # logger.log(f"NeutralPFOs Max = {np.amax(result.neutral_PFOs)}")
        # logger.log(f"ShotPFOs Max = {np.amax(result.shot_PFOs)}")
        # logger.log(f"ConvTracks Max = {np.amax(result.conv_tracks)}")
        # logger.log(f"Jets Max = {np.amax(result.jets)}")

        return result
def test_numpy():
    # This all is fine.
    assert np.array_equal(
        ak.to_numpy(ak.from_iter([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]])),
        np.array([[1 + 1j, 2 + 2j], [3 + 3j, 4 + 4j]]),
    )
    assert (str(
        ak.to_numpy(ak.from_iter([[1 + 1j, 2 + 2j],
                                  [3 + 3j, 4 + 4j]])).dtype) == "complex128")
    assert ak.Array(np.array([[1 + 1j, 2 + 2j], [3 + 3j,
                                                 4 + 4j]])).tolist() == [
                                                     [(1 + 1j), (2 + 2j)],
                                                     [(3 + 3j), (4 + 4j)],
                                                 ]
Beispiel #15
0
def getImage(pts, etas, phis, bins):
    xlim = [-2.5, 2.5]
    ylim = [-3.141593, 3.141593]

    hs = []
    for i in range(len(etas)):
        h = np.histogram2d(ak.to_numpy(etas[i]),
                           ak.to_numpy(phis[i]),
                           weights=ak.to_numpy(pts[i]),
                           bins=bins,
                           range=[xlim, ylim])
        hs.append(h[0])

    return np.stack(hs)
Beispiel #16
0
def get_all_vars(varsIn, varSet, normMean, normStd):
    dSets = []
    dataSet = pd.DataFrame()
    for var in varSet:
        inputArr = varsIn[var][0]
        if variables[var][4] == 2:
            inputArr = np.repeat(ak.to_numpy(inputArr),
                                 ak.to_numpy(varsIn["njetsAK8"][0]))
        if variables[var][5] == 1:
            inputArr = ak.flatten(inputArr)
        elif variables[var][5] == 2:
            inputArr = ak.flatten(inputArr)
        dataSet[var] = inputArr
    dataSet = normalize(dataSet, normMean, normStd)
    return dataSet
def test():
    empty1 = ak.Array(ak.layout.EmptyArray(), check_valid=True)
    empty2 = ak.Array(
        ak.layout.ListOffsetArray64(
            ak.layout.Index64(np.array([0, 0, 0, 0], dtype=np.int64)),
            ak.layout.EmptyArray(),
        ),
        check_valid=True,
    )
    array = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]], check_valid=True)

    assert ak.to_numpy(empty1).dtype.type is np.float64

    assert ak.to_list(array[empty1]) == []
    assert (
        ak.to_list(
            array[
                empty1,
            ]
        )
        == []
    )
    assert ak.to_list(array[empty2]) == [[], [], []]
    assert ak.to_list(
        array[
            empty2,
        ]
    ) == [[], [], []]
Beispiel #18
0
 def pad_and_reshape_nested_arrays(self,
                                   batch,
                                   variable_type,
                                   max_items=10):
     """
     Function that acts on nested data to read relevant variables, pad, reshape and convert data from uproot into
     rectilinear numpy arrays
     :param batch: A dict of awkward arrays from uproot
     :param variable_type: Variable type to be selected e.g. Tracks, Neutral PFO, Jets etc...
     :param max_items: Maximum number of tracks/PFOs etc... to be associated to event
     :return: a rectilinear numpy array of shape:
             (num events in batch, number of variables belonging to variable type, max_items)
     """
     variables = self._variables_dict[variable_type]
     np_arrays = np.zeros((ak.num(batch[variables[0]],
                                  axis=0), len(variables), max_items))
     dummy_val = 0
     thresh = 45
     for i in range(0, len(variables)):
         ak_arr = batch[variables[i]]
         ak_arr = ak.pad_none(ak_arr, max_items, clip=True, axis=1)
         arr = ak.to_numpy(abs(ak_arr)).filled(dummy_val)
         np_arrays[:, i] = arr
     np_arrays = apply_scaling(np_arrays,
                               thresh=thresh,
                               dummy_val=dummy_val)
     np_arrays = np.nan_to_num(np_arrays, posinf=0, neginf=0,
                               copy=False).astype("float64")
     return np_arrays
Beispiel #19
0
    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']

        integratedLuminosity = 137.19*1000 # fb^{-1} to pb^{-1}

        ht = events.HT
        weights = integratedLuminosity*events.CrossSection[ht > 1200]/len(events)
        GenParticles = events.GenParticles
        finalParticles = (GenParticles.Status == 1) & (GenParticles.pt > 1) & (abs(GenParticles.eta) < 2.5) & (GenParticles.Charge != 0)
        nTracksGEN = ak.sum(finalParticles[ht > 1200], axis=1)

        tracks = events.Tracks
        tracks_pt = np.sqrt(tracks.x**2 + tracks.y**2)
        tracks_eta = np.arcsinh(tracks.z / tracks_pt)
        track_cut = (tracks_pt > 1.) & (abs(tracks_eta) < 2.5) & (tracks.fromPV0 >= 2) & tracks.matchedToPFCandidate
        nTracksRECO = ak.to_numpy(ak.sum(track_cut[ht > 1200], axis=1))

        output["sumw"][dataset] += len(events)
        output["nTracksHist"].fill(
            dataset="CMSSW GEN",
            nTracks=nTracksGEN,
            weight=weights
        )
        output["nTracksHist"].fill(
            dataset="CMSSW RECO",
            nTracks=nTracksRECO,
            weight=weights
        )

        return output
def test_date_time():

    numpy_array = np.array(
        ["2020-07-27T10:41:11", "2019-01-01", "2020-01-01"], "datetime64[s]"
    )

    array = ak.Array(numpy_array)
    assert str(array.type) == "3 * datetime64"
    assert array.tolist() == [
        np.datetime64("2020-07-27T10:41:11"),
        np.datetime64("2019-01-01T00:00:00"),
        np.datetime64("2020-01-01T00:00:00"),
    ]
    for i in range(len(array)):
        assert ak.to_numpy(array)[i] == numpy_array[i]

    date_time = np.datetime64("2020-07-27T10:41:11.200000011", "us")
    array1 = ak.Array(np.array(["2020-07-27T10:41:11.200000011"], "datetime64[us]"))
    assert np.datetime64(array1[0], "us") == date_time

    assert ak.to_list(ak.from_iter(array1)) == [
        np.datetime64("2020-07-27T10:41:11.200000")
    ]

    assert ak.max(array) == numpy_array[0]
    assert ak.min(array) == numpy_array[1]
Beispiel #21
0
def test():
    a = ak.to_numpy(ak.Array({"A": [1, 2, 3], "B": [4, None, 5]}))
    assert a["A"].data.tolist() == [1, 2, 3]
    assert a["A"].mask.tolist() == [False, False, False]
    assert a["B"].data[0] == 4
    assert a["B"].data[2] == 5
    assert a["B"].mask.tolist() == [False, True, False]
Beispiel #22
0
    def __compute_cell_posD(self, arr: ak.Array) -> np.ndarray:
        # Convert to numpy array
        arr = ak.to_numpy(arr, allow_missing=False)

        # the unique entries will later corrispond
        # to the lower border of the binning
        uniques = np.unique(arr)
        # calculate the distance
        dist = uniques[1:] - uniques[:-1]

        # remove the distances under a thereshold
        idx_to_del = []
        for i, d in enumerate(dist):
            if abs(d) < conf.mapper.threshold:
                idx_to_del.append(i)
        dist = np.delete(dist, idx_to_del)
        uniques = np.delete(uniques, [i + 1 for i in idx_to_del])

        # Count the frequency of distances and short by frequency
        # np.unique: get unique values and the counts
        # np.sort: sort the array by the lasts axis / the counts
        # [...,::-1] to start with the highest element
        tmparr = np.sort(np.unique(dist, return_counts=1))[..., ::-1]
        # [0] only get the values, not the counts
        # Convert to a list to make removing elements less painful
        hval = list(tmparr[0])
        del tmparr

        # first all vales in hval that are close to each other are collected in one
        compare_idx, running_idx = 0, 1
        while compare_idx < len(hval) - 1:
            assert compare_idx != running_idx
            if np.isclose(hval[compare_idx],
                          hval[running_idx],
                          rtol=conf.mapper.threshold):
                del hval[running_idx]
            else:
                running_idx = running_idx + 1
                if running_idx == len(hval):
                    compare_idx = compare_idx + 1
                    running_idx = compare_idx + 1

        # Next the we iterate over the distances
        # We check if the distances are close to the most frequent
        # values and if so the distance is replace
        for idx, d in enumerate(dist):
            for val in hval:
                if np.isclose(d, val, rtol=conf.mapper.threshold):
                    dist[idx] = val
                    continue

        sum = min(uniques)
        cellpositions = [sum]
        for val in dist:
            sum += val
            cellpositions.append(sum)
        logger.debug(f"dist {dist} \n \n cellpositons \n {cellpositions}")
        cellpositions = np.array(cellpositions)
        assert all(np.diff(cellpositions) > 0)
        return cellpositions
Beispiel #23
0
 def __getpixel(self, val: ak.Array, var: str) -> np.int:
     val = ak.to_numpy(val)
     border = self.binbordersD[var]
     pos = np.digitize(val, border)
     assert 0 <= pos
     assert pos < len(self.cell_posD[var])
     return pos
Beispiel #24
0
def get_root_rest_energies(roots, energies, pxs, pys, pzs):
    """
    Find the energies (of anything really, but presumably jets) in the rest frame
    of particles identified a root particles
    
    Parameters
    ----------
    roots : array of bool
        mask identifying the root particles
    energies : array like of floats
        the energies of the particles
    pxs : array like of floats
        the momentum in the x direction of the particles
    pys : array like of floats
        the momentum in the y direction of the particles
    pzs : array like of floats
        the momentum in the z direction of the particles

    Returns
    -------
    energies : array like of floats
        the energies of the particles in the rest frame of the root
    """
    # if we are to use the roots as indices they must have this form
    energies = ak.to_numpy(energies)
    masses2 = energies**2 - pxs**2 - pys**2 - pzs**2
    pxs = pxs - ak.flatten(pxs[roots])
    pys = pys - ak.flatten(pys[roots])
    pzs = pzs - ak.flatten(pzs[roots])
    energies = np.sqrt(masses2 + pxs**2 + pys**2 + pzs**2)
    return energies
Beispiel #25
0
def lepton_pairing(
    Electron : List[ak_array],
    Muon : List[ak_array]) -> (List[float], List[float], List[float]) :
    """
    Electron and Muon contains values of ["PT", "Eta", "Phi", "Charge"] respectively
    """
    return_Electron = ak.Array([])
    return_Muon = ak.Array([])
    delta_etas = []
    delta_phis = []

    Electron = ak.to_list(Electron)
    Muon = ak.to_list(Muon)
    n_events = len(Electron[0]) # each particle has same n of events but may have 
    # different n of particles in each event
    for idx in range(n_events): # for each event
        for electron_jdx in range(len(Electron[0][idx])):
            for muon_jdx in range(len(Muon[0][idx])):
                print("Electron[-1][idx][electron_jdx]: ", Electron[-1][idx][electron_jdx])
                print("Muon[-1][idx][muon_jdx]: ", Muon[-1][idx][muon_jdx])
                if (Electron[-1][idx][electron_jdx] == -Muon[-1][idx][muon_jdx] ):
                    # if charge sign of electron is opposite of muon, then pairing is made and
                    # add the relevant values to the final return value
                    e_placeholder = []
                    for value in Electron:
                        e_placeholder.append(value[idx][electron_jdx])
                    return_Electron = ak.concatenate((return_Electron, ak.Array([e_placeholder])), axis =0)
                    # print(ak.to_numpy(return_Electron).shape)
                    # print("return_Electron: ", return_Electron)
                    m_placeholder = []
                    for value in Muon:
                        m_placeholder.append(value[idx][muon_jdx])
                    return_Muon = ak.concatenate((return_Muon, ak.Array([m_placeholder])), axis =0)
                    # print(ak.to_numpy(return_Muon).shape)
                    # print("return_Muon: ", return_Muon)
                    # add delta eta and phi values
                    delta_etas.append(delta_eta(Electron[1][idx][electron_jdx], Muon[1][idx][muon_jdx]))
                    delta_phis.append(delta_phi(Electron[-2][idx][electron_jdx], Muon[-2][idx][muon_jdx]))

                    # assign the respective charges as 10 (arbitrary number) to signify that they
                    # have been chosen already, and don't get detected next time
                    # print(type(Electron[-1][idx][electron_jdx]))
                    Electron[-1][idx][electron_jdx] = 10.0
                    Muon[-1][idx][muon_jdx] = 10.0

    delta_package = np.array([delta_etas, delta_phis])
    return (ak.to_numpy(return_Electron), ak.to_numpy(return_Muon), delta_package)
Beispiel #26
0
def process_1tau_1lep(taus_all, muons_all, electrons_all, genWeight_all):

    out_hists = {}

    ntau = ak.num(taus_all)
    nmuon = ak.num(muons_all)
    nelectron = ak.num(electrons_all)

    mask_tau_e = (ntau == 1) & (nelectron == 1) & (nmuon == 0)
    mask_tau_mu = (ntau == 1) & (nmuon == 1) & (nelectron == 0)

    taus_taue = taus_all[mask_tau_e]
    electrons_taue = electrons_all[mask_tau_e]
    genWeight_taue = genWeight_all[mask_tau_e]

    taus_taumu = taus_all[mask_tau_mu]
    muons_taumu = muons_all[mask_tau_mu]
    genWeight_taumu = genWeight_all[mask_tau_mu]

    if len(taus_taue) > 0:

        tau0 = taus_taue[:, 0]
        ele0 = electrons_taue[:, 0]
        mtaue = np.sqrt(
            2 * tau0.pt * ele0.pt *
            (np.cosh(tau0.eta - ele0.eta) - np.cos(tau0.phi - ele0.phi)))
        #mtaue = (taus_taue[:,0] + electrons_taue[:,0]).mass
        out_hists["mtaue"] = Hist1D(ak.to_numpy(mtaue),
                                    bins=bin_mZ,
                                    weights=ak.to_numpy(genWeight_taue),
                                    label="mtaue")

        dR_tau_e = deltaR_devfunc(taus_taue, electrons_taue)
        out_hists["dR_tau_e"] = Hist1D(ak.to_numpy(dR_tau_e),
                                       bins=bin_dR,
                                       weights=ak.to_numpy(genWeight_taue),
                                       label="dR_taue")
    if len(taus_taumu) > 0:

        tau0 = taus_taumu[:, 0]
        mu0 = muons_taumu[:, 0]
        mtaumu = np.sqrt(
            2 * tau0.pt * mu0.pt *
            (np.cosh(tau0.eta - mu0.eta) - np.cos(tau0.phi - mu0.phi)))
        #mtaumu = (taus_taumu[:,0] + muons_taumu[:,0]).mass
        out_hists["mtaumu"] = Hist1D(ak.to_numpy(mtaumu),
                                     bins=bin_mZ,
                                     weights=ak.to_numpy(genWeight_taumu),
                                     label="mtaumu")

        dR_tau_mu = deltaR_devfunc(taus_taumu, muons_taumu)
        out_hists["dR_tau_mu"] = Hist1D(ak.to_numpy(dR_tau_mu),
                                        bins=bin_dR,
                                        weights=ak.to_numpy(genWeight_taumu),
                                        label="dR_taumu")
    return out_hists
Beispiel #27
0
 def _preprocess_fitinf(self, fitinf):
     # preprocess the fitinf a bit - yay!
     n_columns = max(km3io.definitions.fitparameters.values()) + 1
     fitinf_array = np.ma.filled(
         ak.to_numpy(ak.pad_none(fitinf, target=n_columns, axis=-1)),
         fill_value=np.nan,
     ).astype("float32")
     return fitinf_array
Beispiel #28
0
def apply_roccor(df, rochester, is_mc):
    if is_mc:
        hasgen = ~np.isnan(ak.fill_none(df.Muon.matched_gen.pt, np.nan))
        mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(df.Muon.pt)).shape)
        mc_rand = ak.unflatten(mc_rand, ak.num(df.Muon.pt, axis=1))

        corrections = np.array(ak.flatten(ak.ones_like(df.Muon.pt)))
        errors = np.array(ak.flatten(ak.ones_like(df.Muon.pt)))
        mc_kspread = rochester.kSpreadMC(
            df.Muon.charge[hasgen],
            df.Muon.pt[hasgen],
            df.Muon.eta[hasgen],
            df.Muon.phi[hasgen],
            df.Muon.matched_gen.pt[hasgen],
        )

        mc_ksmear = rochester.kSmearMC(
            df.Muon.charge[~hasgen],
            df.Muon.pt[~hasgen],
            df.Muon.eta[~hasgen],
            df.Muon.phi[~hasgen],
            df.Muon.nTrackerLayers[~hasgen],
            mc_rand[~hasgen],
        )

        errspread = rochester.kSpreadMCerror(
            df.Muon.charge[hasgen],
            df.Muon.pt[hasgen],
            df.Muon.eta[hasgen],
            df.Muon.phi[hasgen],
            df.Muon.matched_gen.pt[hasgen],
        )
        errsmear = rochester.kSmearMCerror(
            df.Muon.charge[~hasgen],
            df.Muon.pt[~hasgen],
            df.Muon.eta[~hasgen],
            df.Muon.phi[~hasgen],
            df.Muon.nTrackerLayers[~hasgen],
            mc_rand[~hasgen],
        )
        hasgen_flat = np.array(ak.flatten(hasgen))
        corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread))
        corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear))
        errors[hasgen_flat] = np.array(ak.flatten(errspread))
        errors[~hasgen_flat] = np.array(ak.flatten(errsmear))

        corrections = ak.unflatten(corrections, ak.num(df.Muon.pt, axis=1))
        errors = ak.unflatten(errors, ak.num(df.Muon.pt, axis=1))

    else:
        corrections = rochester.kScaleDT(df.Muon.charge, df.Muon.pt,
                                         df.Muon.eta, df.Muon.phi)
        errors = rochester.kScaleDTerror(df.Muon.charge, df.Muon.pt,
                                         df.Muon.eta, df.Muon.phi)

    df["Muon", "pt_roch"] = df.Muon.pt * corrections
    df["Muon", "pt_roch_up"] = df.Muon.pt_roch + df.Muon.pt * errors
    df["Muon", "pt_roch_down"] = df.Muon.pt_roch - df.Muon.pt * errors
    def get_scale_factor(self, jets, passing_cut):
        '''Starting from a jet collection and a string pointing to 
        the flag defining if the jet is b-tagged or not computes the 
        per-jet weight to be used. Supports only a single WP for the 
        moment'''
        # First of all flatten everything to make it easier to handle
        pt = ak.to_numpy(ak.flatten(jets.pt))
        eta = ak.to_numpy(ak.flatten(jets.eta))
        flav = ak.to_numpy(ak.flatten(jets.hadronFlavour))
        pass_wp = ak.to_numpy(ak.flatten(jets[passing_cut]))

        # Get the MC efficiency
        eff = self.efficiency_(pt, eta, flav)
        # for each systematic/central value compute the proper SF
        # cache the SF values as sometimes they are repeated, there
        # might also be systematic combinations that are never accessed
        # but pruning them at the beginning can be hard
        # use schema to define combinations, lcb is a tuple with the sf keys
        # for light, charm, bottom for each systematic
        flavour_sf_cache = {}
        scale_factors = {}  # our final product
        for key, lcb in self.schema_.items():
            # populate cache if needed
            for i in range(3):
                flavour_sf_cache[lcb[i]] = flavour_sf_cache.get(
                    # for some reason there is an additional dimension, pass_wp has no effect
                    lcb[i],
                    self.sf_[lcb[i]](eta, pt, pass_wp))
            scale_factors[key] = eff * self.match_flav_(
                flavour_sf_cache[lcb[0]], flavour_sf_cache[lcb[1]],
                flavour_sf_cache[lcb[2]], flav)

        # use SF and eff to compute p(data) and p(MC)
        p_data = {
            key: np.where(pass_wp, val, 1 - val)
            for key, val in scale_factors.items()
        }
        p_mc = np.where(pass_wp, eff, 1 - eff)

        # return the unflattened version of the ratio
        return {
            key: ak.unflatten(i / p_mc, ak.num(jets.pt))
            for key, i in p_data.items()
        }
Beispiel #30
0
def make_composite_distributions():
    global tree
    matchedMask = tree["t5_isFake"].array() == 0
    layers = np.array(list(map(process_layers, ak.flatten(tree["t5_layer_binary"].array()))))
    layerTypes = np.array(list(map(process_layerType, layers)))
#    layerTypes = np.array(list(map(process_numbers, layers)))
    unique_layerTypes = np.unique(layerTypes, axis=0)
    unique_layerTypes = np.append(unique_layerTypes,"")
    print(unique_layerTypes)

    for layerType in unique_layerTypes:
        print("layerType = {}".format(layerType))
        innerRadius = ak.to_numpy(ak.flatten(tree["t5_innerRadius"].array()))
        innerRadiusMin = ak.to_numpy(ak.flatten(tree["t5_innerRadiusMin"].array()))
        innerRadiusMax = ak.to_numpy(ak.flatten(tree["t5_innerRadiusMax"].array()))
        outerRadius = ak.to_numpy(ak.flatten(tree["t5_outerRadius"].array()))
        outerRadiusMin = ak.to_numpy(ak.flatten(tree["t5_outerRadiusMin"].array()))
        outerRadiusMax = ak.to_numpy(ak.flatten(tree["t5_outerRadiusMax"].array()))

        qArray = (outerRadiusMin - innerRadiusMax) / innerRadiusMax
        qArray[innerRadius > outerRadius] = (innerRadiusMin[innerRadius > outerRadius] - outerRadiusMax[innerRadius > outerRadius])/ innerRadiusMin[innerRadius > outerRadius]

        qArrayInv = (1.0/innerRadiusMax - 1.0/outerRadiusMin) / (1.0/innerRadiusMax)
        qArrayInv[innerRadius > outerRadius] = (1.0/ outerRadiusMax[innerRadius > outerRadius] - 1.0/innerRadiusMin[innerRadius > outerRadius])/(1.0/innerRadiusMin[innerRadius > outerRadius])

        if layerType == "":
            qArraySimTrackMatched = qArray[ak.to_numpy(ak.flatten(matchedMask))]
            qArrayInvSimTrackMatched = qArrayInv[ak.to_numpy(ak.flatten(matchedMask))]
        else:
            qArray = qArray[layerTypes == layerType]
            qArrayInv = qArrayInv[layerTypes == layerType]
            qArraySimTrackMatched = qArray[ak.to_numpy(ak.flatten(matchedMask)[layerTypes == layerType])]
            qArrayInvSimTrackMatched = qArrayInv[ak.to_numpy(ak.flatten(matchedMask)[layerTypes == layerType])]

        print("deltaR integral = ", len(qArray), "deltaR sim track matched integral = ", len(qArraySimTrackMatched))
        print("deltaR integral above 0 = ", sum(qArray >= 0), "deltaR sim track matched integral above 0 = ", sum(qArraySimTrackMatched >= 0))

        print("deltaInvR integral = ", len(qArrayInv), "deltaInvR sim track matched integral = ", len(qArrayInvSimTrackMatched))
        print("deltaInvR integral above 0 = ", sum(qArray >= 0), "deltaInvR sim track matched integral above 0 = ", sum(qArrayInvSimTrackMatched >= 0))

        make_plots(abs(qArray[qArray > 0]), abs(qArraySimTrackMatched[qArraySimTrackMatched > 0]), "deltaR/innerRadius",layerType)

        make_plots(abs(qArrayInv[qArrayInv > 0]), abs(qArrayInvSimTrackMatched[qArrayInvSimTrackMatched > 0]), "delta(1/R)/1/innerRadius", layerType)