Exemple #1
0
def _remove_outliers_from_hist(
        hist: Hist, outliers_start_index: int,
        outliers_removal_axis: OutliersRemovalAxis) -> None:
    """Remove outliers from a given histogram.

    Args:
        hist: Histogram to check for outliers.
        outliers_start_index: Index in the truth axis where outliers begin.
        outliers_removal_axis: Axis along which outliers removal will be performed. Usually
            the particle level aixs.
    Returns:
        None. The histogram is modified in place.
    """
    # Use on TH1, TH2, and TH3 since we don't start removing immediately, but instead only after the limit
    if outliers_start_index > 0:
        # logger.debug("Removing outliers")
        # Check for values above which they should be removed by translating the global index
        x = ctypes.c_int(0)
        y = ctypes.c_int(0)
        z = ctypes.c_int(0)
        # Maps axis to valaues
        # This is kind of dumb, but it works.
        outliers_removal_axis_values: Dict[OutliersRemovalAxis,
                                           ctypes.c_int] = {
                                               projectors.TH1AxisType.x_axis:
                                               x,
                                               projectors.TH1AxisType.y_axis:
                                               y,
                                               projectors.TH1AxisType.z_axis:
                                               z,
                                           }
        for index in range(0, hist.GetNcells()):
            # Get the bin x, y, z from the global bin
            hist.GetBinXYZ(index, x, y, z)
            # Watch out for any problems
            if hist.GetBinContent(index) < hist.GetBinError(index):
                logger.warning(
                    f"Bin content < error. Name: {hist.GetName()}, Bin content: {hist.GetBinContent(index)}, Bin error: {hist.GetBinError(index)}, index: {index}, ({x.value}, {y.value})"
                )
            if outliers_removal_axis_values[
                    outliers_removal_axis].value >= outliers_start_index:
                # logger.debug("Cutting for index {}. x bin {}. Cut index: {}".format(index, x, cutIndex))
                hist.SetBinContent(index, 0)
                hist.SetBinError(index, 0)
    else:
        logger.info(f"Hist {hist.GetName()} did not have any outliers to cut")
Exemple #2
0
def get_array_from_hist2D(hist: Hist,
                          set_zero_to_NaN: bool = True,
                          return_bin_edges: bool = False
                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """ Extract x, y, and bin values from a 2D ROOT histogram.

    Converts the histogram into a numpy array, and suitably processes it for a surface plot
    by removing 0s (which can cause problems when taking logs), and returning a set of (x, y) mesh
    values utilziing either the bin edges or bin centers.

    Note:
        This is a different format than the 1D version!

    Args:
        hist (ROOT.TH2): Histogram to be converted.
        set_zero_to_NaN: If true, set 0 in the array to NaN. Useful with matplotlib so that it will
            ignore the values when plotting. See comments in this function for more details. Default: True.
        return_bin_edges: Return x and y using bin edges instead of bin centers.
    Returns:
        Contains (x values, y values, numpy array of hist data) where (x, y) are values on a
            grid (from np.meshgrid) using the selected bin values.
    """
    # Process the hist into a suitable state
    # NOTE: The shape specific can be somewhat confusing (ie. I would naviely expected to specify the x first.)
    # This says that the ``GetYaxis().GetNbins()`` number of rows and ``GetXaxis().GetNbins()`` number of columns.
    shape = (hist.GetYaxis().GetNbins(), hist.GetXaxis().GetNbins())
    # To keep consistency with the root_numpy 2D hist format, we transpose the final result
    # This format has x values as columns.
    hist_array = np.array([
        hist.GetBinContent(x) for x in range(1, hist.GetNcells())
        if not hist.IsBinUnderflow(x) and not hist.IsBinOverflow(x)
    ])
    # The hist_array was linear, so we need to shape it into our expected 2D values.
    hist_array = hist_array.reshape(shape)
    # Transpose the array to better match expectations
    # In particular, by transposing the array, it means that ``thist_array[1][0]`` gives the 2nd x
    # value (x_index = 1) and the 1st y value (y_index = 1). This is as we would expect. This is also
    # the same convention as used by root_numpy
    hist_array = hist_array.T
    # Set all 0s to nan to get similar behavior to ROOT. In ROOT, it will basically ignore 0s. This is
    # especially important for log plots. Matplotlib doesn't handle 0s as well, since it attempts to
    # plot them and then will throw exceptions when the log is taken.
    # By setting to nan, matplotlib basically ignores them similar to ROOT
    # NOTE: This requires a few special functions later which ignore nan when calculating min and max.
    if set_zero_to_NaN:
        hist_array[hist_array == 0] = np.nan

    if return_bin_edges:
        # Bin edges
        x_bin_edges = get_bin_edges_from_axis(hist.GetXaxis())
        y_bin_edges = get_bin_edges_from_axis(hist.GetYaxis())

        # NOTE: The addition of epsilon to the max is extremely important! Otherwise, the x and y
        #       ranges will be one bin short since ``arange`` is not inclusive. This could also be resolved
        #       by using ``linspace``, but I think this approach is perfectly fine.
        # NOTE: This epsilon is smaller than the one in ``utils`` because we are sometimes dealing
        #       with small times (~ns). The other value is larger because (I seem to recall) that
        #       smaller values didn't always place nice with ROOT, but it is fine here, since we're
        #       working with numpy.
        # NOTE: This should be identical to taking the min and max of the axis using
        #       ``TAxis.GetXmin()`` and ``TAxis.GetXmax()``, but I prefer this approach.
        epsilon = 1e-9
        x_range = np.arange(np.amin(x_bin_edges),
                            np.amax(x_bin_edges) + epsilon,
                            hist.GetXaxis().GetBinWidth(1))
        y_range = np.arange(np.amin(y_bin_edges),
                            np.amax(y_bin_edges) + epsilon,
                            hist.GetYaxis().GetBinWidth(1))
    else:
        # We want an array of bin centers
        x_range = np.array([
            hist.GetXaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetXaxis().GetNbins() + 1)
        ])
        y_range = np.array([
            hist.GetYaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetYaxis().GetNbins() + 1)
        ])

    X, Y = np.meshgrid(x_range, y_range)

    return (X, Y, hist_array)