예제 #1
0
def get_array_from_hist2D(hist: Hist,
                          set_zero_to_NaN: bool = True,
                          return_bin_edges: bool = False
                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """ Extract x, y, and bin values from a 2D ROOT histogram.

    Converts the histogram into a numpy array, and suitably processes it for a surface plot
    by removing 0s (which can cause problems when taking logs), and returning a set of (x, y) mesh
    values utilziing either the bin edges or bin centers.

    Note:
        This is a different format than the 1D version!

    Args:
        hist (ROOT.TH2): Histogram to be converted.
        set_zero_to_NaN: If true, set 0 in the array to NaN. Useful with matplotlib so that it will
            ignore the values when plotting. See comments in this function for more details. Default: True.
        return_bin_edges: Return x and y using bin edges instead of bin centers.
    Returns:
        Contains (x values, y values, numpy array of hist data) where (x, y) are values on a
            grid (from np.meshgrid) using the selected bin values.
    """
    # Process the hist into a suitable state
    # NOTE: The shape specific can be somewhat confusing (ie. I would naviely expected to specify the x first.)
    # This says that the ``GetYaxis().GetNbins()`` number of rows and ``GetXaxis().GetNbins()`` number of columns.
    shape = (hist.GetYaxis().GetNbins(), hist.GetXaxis().GetNbins())
    # To keep consistency with the root_numpy 2D hist format, we transpose the final result
    # This format has x values as columns.
    hist_array = np.array([
        hist.GetBinContent(x) for x in range(1, hist.GetNcells())
        if not hist.IsBinUnderflow(x) and not hist.IsBinOverflow(x)
    ])
    # The hist_array was linear, so we need to shape it into our expected 2D values.
    hist_array = hist_array.reshape(shape)
    # Transpose the array to better match expectations
    # In particular, by transposing the array, it means that ``thist_array[1][0]`` gives the 2nd x
    # value (x_index = 1) and the 1st y value (y_index = 1). This is as we would expect. This is also
    # the same convention as used by root_numpy
    hist_array = hist_array.T
    # Set all 0s to nan to get similar behavior to ROOT. In ROOT, it will basically ignore 0s. This is
    # especially important for log plots. Matplotlib doesn't handle 0s as well, since it attempts to
    # plot them and then will throw exceptions when the log is taken.
    # By setting to nan, matplotlib basically ignores them similar to ROOT
    # NOTE: This requires a few special functions later which ignore nan when calculating min and max.
    if set_zero_to_NaN:
        hist_array[hist_array == 0] = np.nan

    if return_bin_edges:
        # Bin edges
        x_bin_edges = get_bin_edges_from_axis(hist.GetXaxis())
        y_bin_edges = get_bin_edges_from_axis(hist.GetYaxis())

        # NOTE: The addition of epsilon to the max is extremely important! Otherwise, the x and y
        #       ranges will be one bin short since ``arange`` is not inclusive. This could also be resolved
        #       by using ``linspace``, but I think this approach is perfectly fine.
        # NOTE: This epsilon is smaller than the one in ``utils`` because we are sometimes dealing
        #       with small times (~ns). The other value is larger because (I seem to recall) that
        #       smaller values didn't always place nice with ROOT, but it is fine here, since we're
        #       working with numpy.
        # NOTE: This should be identical to taking the min and max of the axis using
        #       ``TAxis.GetXmin()`` and ``TAxis.GetXmax()``, but I prefer this approach.
        epsilon = 1e-9
        x_range = np.arange(np.amin(x_bin_edges),
                            np.amax(x_bin_edges) + epsilon,
                            hist.GetXaxis().GetBinWidth(1))
        y_range = np.arange(np.amin(y_bin_edges),
                            np.amax(y_bin_edges) + epsilon,
                            hist.GetYaxis().GetBinWidth(1))
    else:
        # We want an array of bin centers
        x_range = np.array([
            hist.GetXaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetXaxis().GetNbins() + 1)
        ])
        y_range = np.array([
            hist.GetYaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetYaxis().GetNbins() + 1)
        ])

    X, Y = np.meshgrid(x_range, y_range)

    return (X, Y, hist_array)
예제 #2
0
    def _from_th1(
        hist: Hist
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
        """ Convert a TH1 histogram to a Histogram.

        Note:
            Underflow and overflow bins are excluded!

        Args:
            hist (ROOT.TH1): Input histogram.
        Returns:
            tuple: (x, y, errors) where x is the bin centers, y is the bin values, and
                errors are the sumw2 bin errors.
        """
        # Enable sumw2 if it's not already calculated
        if hist.GetSumw2N() == 0:
            hist.Sumw2(True)

        # Don't include overflow
        bin_edges = get_bin_edges_from_axis(hist.GetXaxis())
        # NOTE: The y value and bin error are stored with the hist, not the axis.
        y = np.array([
            hist.GetBinContent(i)
            for i in range(1,
                           hist.GetXaxis().GetNbins() + 1)
        ])
        errors = np.array(hist.GetSumw2())
        # Exclude the under/overflow bins
        errors = errors[1:-1]
        metadata = {}

        # Check for a TProfile.
        # In that case we need to retrieve the errors manually because the Sumw2() errors are
        # not the anticipated errors.
        if hasattr(hist, "BuildOptions"):
            errors = np.array([
                hist.GetBinError(i)
                for i in range(1,
                               hist.GetXaxis().GetNbins() + 1)
            ])
            # We expected errors squared
            errors = errors**2
        else:
            # Sanity check. If they don't match, something odd has almost certainly occurred.
            if not np.isclose(errors[0], hist.GetBinError(1)**2):
                raise ValueError(
                    "Sumw2 errors don't seem to represent bin errors!")

            # Retrieve the stats and store them in the metadata.
            # They are useful for calculating histogram properties (mean, variance, etc).
            stats = np.array([0, 0, 0, 0], dtype=np.float64)
            hist.GetStats(np.ctypeslib.as_ctypes(stats))
            # Return values are (each one is a single float):
            # [1], [2], [3], [4]
            # [1]: total_sum_w: Sum of weights (equal to np.sum(y) if unscaled)
            # [2]: total_sum_w2: Sum of weights squared (equal to np.sum(errors_squared) if unscaled)
            # [3]: total_sum_wx: Sum of w*x
            # [4}: total_sum_wx2: Sum of w*x*x
            metadata.update(_create_stats_dict_from_values(*stats))

        return (bin_edges, y, errors, metadata)