def calculate_residual_2D(efficiency_data: Hist, efficiency_function: Callable[..., float],
                          efficiency_period: Any, centrality_bin: int) -> Tuple[np.ndarray, List[float], List[float]]:
    """ Calculate residual for 2D tracking efficiency.

    There is a separate 1D and 2D function for convenience. If there is no entries for a particular
    bin, we set the value to NaN so that it can be ignored later when plotting.

    Args:
        efficiency_data: 2D efficiency data.
        efficiency_function: Efficiency function.
        efficiency_period: Efficiency period.
        centrality_bin: Centrality bin.
    Returns:
        Calculated residual, pt values where it was evaluated, eta values where it was evaluated.
    """
    pts = [efficiency_data.GetXaxis().GetBinCenter(x) for x in range(1, efficiency_data.GetXaxis().GetNbins() + 1)]
    etas = [efficiency_data.GetYaxis().GetBinCenter(y) for y in range(1, efficiency_data.GetYaxis().GetNbins() + 1)]
    residual = np.zeros(shape = (efficiency_data.GetXaxis().GetNbins(),
                                 efficiency_data.GetYaxis().GetNbins()))
    # Loop over all of the bins in the data histogram.
    chi_2 = []
    for pt_index, pt in enumerate(pts):
        for eta_index, eta in enumerate(etas):
            x = pt_index + 1
            y = eta_index + 1
            # Calculate the efficiency. It's calculated again here to ensure that it's evaluated at exactly
            # the same location as in the data histogram.
            efficiency_at_value = efficiency_function(pt, eta, centrality_bin, efficiency_period, "task_name")

            # Determine the histogram value, setting it to NaN if there's no entries.
            if np.abs(efficiency_data.GetBinContent(x, y)) < epsilon:
                value = np.nan
            else:
                value = (efficiency_data.GetBinContent(x, y) - efficiency_at_value) / efficiency_at_value * 100.
                # The points around the edges aren't super reliable for calcuating chi squared
                if pt > 1 and np.abs(eta) < 0.8:
                    chi_2.append(np.power(efficiency_data.GetBinContent(x, y) - efficiency_at_value, 2) / np.power(efficiency_data.GetBinError(x, y), 2))

            residual[pt_index, eta_index] = value

    # Check max values
    logger.debug(f"min efficiency_data: {efficiency_data.GetMinimum()}, "
                 f"max efficiency_data: {efficiency_data.GetMaximum()}")
    logger.debug(f"min residual: {np.nanmin(residual)}, max residual: {np.nanmax(residual)}")
    logger.debug(f"standard mean: {np.nanmean(residual)}")
    logger.debug(f"restricted mean: {np.nanmean(residual[:,np.abs(etas) < 0.8])}")
    logger.debug(f"len(pts): {len(pts)}, len(etas): {len(etas)}")

    # Check chi squared
    chi_squared = np.sum(chi_2)
    # 23 is the number of parameters (10 + 13) at any given point
    ndf = len(chi_2) - 23
    logger.warning("NOTE: The restricted chi squared value calculated here may not be super reliable.")
    logger.info(f"Chi squared: {chi_squared}")
    logger.info(f"NDF: {ndf}")
    logger.info(f"chi2/ndf: {chi_squared / ndf}")

    return residual, pts, etas
Ejemplo n.º 2
0
 def _apply_labels_ROOT(self, hist: Hist) -> None:
     if self.title is not None:
         hist.SetTitle(labels.use_label_with_root(self.title))
     if self.x_label is not None:
         hist.GetXaxis().SetTitle(labels.use_label_with_root(self.x_label))
     if self.y_label is not None:
         hist.GetYaxis().SetTitle(labels.use_label_with_root(self.y_label))
Ejemplo n.º 3
0
def _check_normalization(
        hist: Hist, response_normalization: ResponseNormalization) -> bool:
    """ Check each bin to ensure that the normalization was successful.

    Args:
        hist: Response matrix to check. This must be a 2D histogram.
        response_normalization: Normalization convention for the response matrix.
    Returns:
        True if the normalization is fine.
    Raises:
        ValueError: If the normalization fails for a particular bin.
    """
    for index in range(1, hist.GetXaxis().GetNbins() + 1):
        # Access bins
        bins_content, _ = _access_set_of_values_associated_with_a_bin(
            hist=hist,
            bin_of_interest=index,
            response_normalization=response_normalization,
        )
        # Get norm
        norm = np.sum(bins_content)

        # Somewhat arbitrarily comparison limit selected. It should be sufficiently small.
        comparison_limit = 1e-9
        if not np.isclose(norm, 0, atol=comparison_limit) and not np.isclose(
                norm, 1, atol=comparison_limit):
            raise ValueError(
                f"Normalization not successful for bin {index}. Norm: {norm}")

    return True
Ejemplo n.º 4
0
def post_projection_processing_for_2d_correlation(
        hist: Hist,
        normalization_factor: float,
        title_label: str,
        jet_pt: analysis_objects.JetPtBin,
        track_pt: analysis_objects.TrackPtBin,
        rebin_factors: Optional[Tuple[int, int]] = None) -> None:
    """ Basic post processing tasks for a new 2D correlation observable.

    Args:
        hist: Histogram to be post processed.
        normalization_factor: Factor by which the hist should be scaled.
        title_label: Histogram title label.
        jet_pt: Jet pt bin.
        track_pt: Track pt bin.
        rebin_factors: (x rebin factor, y rebin factor). Both values must be specified (can set to 1 if you
            don't want to rebin a particular axis). Default: None.
    Returns:
        None. The histogram is modified in place.
    """
    # If we specify a rebin factor, then rebin.
    if rebin_factors is not None:
        hist.Rebin2D(*rebin_factors)

    # Scale
    hist.Scale(1.0 / normalization_factor)

    # Set title, axis labels
    jet_pt_bins_title = labels.jet_pt_range_string(jet_pt)
    track_pt_bins_title = labels.track_pt_range_string(track_pt)
    hist.SetTitle(
        rf"{title_label}\:\mathrm{{with}}\:{jet_pt_bins_title} \mathrm{{,}} {track_pt_bins_title}"
    )
    hist.GetXaxis().SetTitle(r"$\Delta\varphi$")
    hist.GetYaxis().SetTitle(r"$\Delta\eta$")
def measure_mixed_event_normalization(
        mixed_event: Hist,
        eta_limits: Tuple[float, float],
        delta_phi_rebin_factor: int = 1) -> float:
    """ Determine normalization of the mixed event.

    The normalization is determined by using the moving average of half of the histogram.

    We need to project over a range of constant eta to be able to use the extracted max in the 2D
    mixed event. Joel uses [-0.4, 0.4], but it really seems to drop in the 0.4 bin, so instead I'll
    use 0.3 This value also depends on the max track eta. For 0.9, it should be 0.4 (0.9-0.5), but
    for 0.8, it should be 0.3 (0.8-0.5)

    Note:
        This assumes that delta phi is on the x axis and delta eta is on the y axis.

    Args:
        mixed_event: Mixed event histogram.
        eta_limits: Min and max eta range limits.
        delta_phi_rebin_factor: Factor by which we will rebin the mixed event, and therefore by which we
            must scaled the mixed event normalization.
    Returns:
        Mixed event normalization value.
    """
    # Project to 1D delta phi so it can be used with the signal finder
    peak_finding_hist, peak_finding_hist_array = _peak_finding_objects_from_mixed_event(
        mixed_event=mixed_event, eta_limits=eta_limits)

    # Using moving average looking at window half of the size of the delta phi axis (ie looking 5 bins
    # ahead if there 10 bins in the axis).
    moving_avg = utils.moving_average(peak_finding_hist_array,
                                      n=mixed_event.GetXaxis().GetNbins() // 2)
    max_moving_avg: float = np.max(moving_avg)

    # Finally determine the mixed event normalziation.
    mixed_event_normalization = max_moving_avg
    # Watch out for a zero would could cause problems.
    if not mixed_event_normalization != 0:
        logger.warning(
            f"Could not normalize the mixed event hist \"{mixed_event.GetName()}\" due to no data at (0,0)!"
        )
        mixed_event_normalization = 1

    # Account for a rebin factor. For example, if we rebin by 2, then we need to scale up
    # the normalization factor by 2.
    mixed_event_normalization *= delta_phi_rebin_factor

    return mixed_event_normalization
Ejemplo n.º 6
0
def post_creation_processing_for_1d_correlations(
        hist: Hist, normalization_factor: float, rebin_factor: int,
        title_label: str, axis_label: str, jet_pt: analysis_objects.JetPtBin,
        track_pt: analysis_objects.TrackPtBin) -> None:
    """ Basic post processing tasks for a new 1D correlation observable. """
    # Rebin to decrease the fluctuations in the correlations
    # We don't scale by the rebin factor here because we will scale by bin width later.
    # Since we will handle it later, it doesn't make sense to try to preserve normalization here.
    hist.Rebin(rebin_factor)

    # Scale
    hist.Scale(1.0 / normalization_factor)

    # Set title, labels
    jet_pt_bins_title = labels.jet_pt_range_string(jet_pt)
    track_pt_bins_title = labels.track_pt_range_string(track_pt)
    # This won't look so good in ROOT, but that's just because their latex rendering is absolutely atrocious...
    hist.SetTitle(
        rf"{title_label} with {jet_pt_bins_title}, {track_pt_bins_title}")
    hist.GetXaxis().SetTitle(axis_label)
    hist.GetYaxis().SetTitle(fr"$\mathrm{{dN}}/\mathrm{{d}}{axis_label}$")
Ejemplo n.º 7
0
def _plot_response_matrix_with_ROOT(
        name: str, x_label: str, y_label: str, output_name: str, hist: Hist,
        plot_errors_hist: bool,
        output_info: analysis_objects.PlottingOutputWrapper) -> None:
    """ Underlying function to actually plot a response matrix with ROOT.

    Args:
        name: Name of the histogram.
        x_label: X axis label.
        y_label: Y axis label.
        output_name: Output name of the histogram.
        hist: The response matrix related 2D hist.
        errors_hist: True if the hist is the response matrix errors hist.
        output_info: Output information.
    Returns:
        None
    """
    # Setup
    canvas = ROOT.TCanvas("canvas", "canvas")
    canvas.SetLogz(True)

    # Plot the histogram
    hist.SetTitle(name)
    hist.GetXaxis().SetTitle(labels.use_label_with_root(x_label))
    hist.GetYaxis().SetTitle(labels.use_label_with_root(y_label))
    hist.Draw("colz")

    # Set the final axis ranges.
    # Z axis
    min_val = ctypes.c_double(0)
    max_val = ctypes.c_double(0)
    hist.GetMinimumAndMaximum(min_val, max_val)
    # * 1.1 to put it slightly above the max value
    # min_val doesn't work here, because there are some entries at 0
    hist.GetZaxis().SetRangeUser(10e-7, max_val.value * 1.1)

    # Save
    output_name += "_ROOT"
    plot_base.save_plot(output_info, canvas, output_name)
Ejemplo n.º 8
0
def _calculate_bin_width_scale_factor(hist: Hist,
                                      additional_scale_factor: float = 1.0
                                      ) -> float:
    """ Calculate the bin width scale factor of a histogram.

    Args:
        hist: Hist to use for calculating the scale factor.
        additional_scale_factor: An additional scale factor to include in the calculation.
    Returns:
        The bin width scale factor for the hist.
    """
    # The first bin should always exist!
    bin_width_scale_factor: float = hist.GetXaxis().GetBinWidth(1)
    # Because of a ROOT quirk, even a TH1* hist has a Y and Z axis, with 1 bin
    # each. This bin has bin width 1, so it doesn't change anything if we multiply
    # by that bin width. So we just do it for all histograms.
    # This has the benefit that we don't need explicit dependence on an imported
    # ROOT package.
    bin_width_scale_factor *= hist.GetYaxis().GetBinWidth(1)
    bin_width_scale_factor *= hist.GetZaxis().GetBinWidth(1)

    final_scale_factor = additional_scale_factor / bin_width_scale_factor

    return final_scale_factor
Ejemplo n.º 9
0
def normalize_response_matrix(
        hist: Hist, response_normalization: ResponseNormalization) -> None:
    """ Normalize response matrix.

    In the case of normalizing each detector pt bin (usually on the x axis), we take all associated truth level
    bins (usually the y axis), and normalize that array of truth bins to 1. In the case of normalizing the truth
    level bins, the case is reversed.

    Args:
        hist: The response matrix
        response_normalization: Response normalization convention, which dictates which axis to normalize.
    Returns:
        None. The response matrix is modified in place.
    """
    if response_normalization == ResponseNormalization.none:
        # Nothing to be done, so just return.
        return

    # Determine the relevant parameters for normalizing the response
    # Each entry is of the form (projection_function, max_bins)
    parameters_map = {
        ResponseNormalization.normalize_each_detector_bin: (
            ROOT.TH2.ProjectionY,
            hist.GetXaxis().GetNbins() + 1,
        ),
        ResponseNormalization.normalize_each_truth_bin: (
            ROOT.TH2.ProjectionX,
            hist.GetYaxis().GetNbins() + 1,
        ),
    }
    projection_function, max_bins = parameters_map[response_normalization]

    # We decided to ignore the overflow bins.
    for index in range(1, max_bins):
        # Access bins
        bins_content, _ = _access_set_of_values_associated_with_a_bin(
            hist=hist,
            bin_of_interest=index,
            response_normalization=response_normalization,
        )

        norm = np.sum(bins_content)
        # NOTE: The upper bound on integrals is inclusive!
        proj = projection_function(hist,
                                   f"{hist.GetName()}_projection_{index}",
                                   index, index)

        # Sanity checks
        # NOTE: The upper bound on integrals is inclusive!
        # NOTE: Integral() == Integral(1, proj.GetXaxis().GetNbins())
        if not np.isclose(norm, proj.Integral(1, proj.GetXaxis().GetNbins())):
            raise ValueError(
                f"Mismatch between sum and integral! norm: {norm},"
                f" integral: {proj.Integral(1, proj.GetXaxis().GetNbins())}")
        if not np.isclose(proj.Integral(),
                          proj.Integral(1,
                                        proj.GetXaxis().GetNbins())):
            raise ValueError(
                f"Integral mismatch! Full: {proj.Integral()} 1-nBins: {proj.Integral(1, proj.GetXaxis().GetNbins())}"
            )

        # Avoid scaling by 0
        if not norm > 0.0:
            continue

        # normalization by sum
        _scale_set_of_bins(
            hist=hist,
            bin_of_interest=index,
            response_normalization=response_normalization,
            scale_factor=norm,
        )

    # Final sanity check by checking that the normalization is correct in each bin.
    res = _check_normalization(hist=hist,
                               response_normalization=response_normalization)

    if not res:
        raise ValueError("Normalization check failed.")
Ejemplo n.º 10
0
def calculate_systematic_2D(
        nominal: Hist, variation: Hist,
        signal_dominated: analysis_objects.AnalysisBin,
        background_dominated: analysis_objects.AnalysisBin) -> float:
    """ Calculate a systematic in 2D.

    Args:
        nominal: Nominal hist.
        variation: Systematically varied hist.
        signal_dominated: Signal dominated region.
        Background_dominated: Background dominated region.
    Returns:
        The systematic calculated over the specified ranges.
    """
    # Calculate the nominal integral in the signal region
    nominal_signal = nominal.Integral(
        1,
        nominal.GetXaxis().GetNbins(),
        nominal.GetYaxis().FindBin(-1.0 * signal_dominated.max + epsilon),
        nominal.GetYaxis().FindBin(signal_dominated.max - epsilon),
    )
    # And in the background dominated region
    nominal_background = nominal.Integral(
        1,
        nominal.GetXaxis().GetNbins(),
        nominal.GetYaxis().FindBin(-1.0 * background_dominated.max + epsilon),
        nominal.GetYaxis().FindBin(-1.0 * background_dominated.min - epsilon),
    ) + nominal.Integral(
        1,
        nominal.GetXaxis().GetNbins(),
        nominal.GetYaxis().FindBin(background_dominated.min + epsilon),
        nominal.GetYaxis().FindBin(background_dominated.max - epsilon),
    )

    # Calculate the variation integral in the signal region
    variation_signal = variation.Integral(
        1,
        variation.GetXaxis().GetNbins(),
        variation.GetYaxis().FindBin(-1.0 * signal_dominated.max + epsilon),
        variation.GetYaxis().FindBin(signal_dominated.max - epsilon),
    )
    # And in the background dominated region
    variation_background = variation.Integral(
        1,
        variation.GetXaxis().GetNbins(),
        variation.GetYaxis().FindBin(-1.0 * background_dominated.max +
                                     epsilon),
        variation.GetYaxis().FindBin(-1.0 * background_dominated.min -
                                     epsilon),
    ) + variation.Integral(
        1,
        variation.GetXaxis().GetNbins(),
        variation.GetYaxis().FindBin(background_dominated.min + epsilon),
        variation.GetYaxis().FindBin(background_dominated.max - epsilon),
    )

    # Basically, signal / variation
    # NOTE: There is no need to apply the correlation scale factor because it will cancel
    #       in the factor.
    factor = (nominal_signal / nominal_background) / (variation_signal /
                                                      variation_background)

    # Help out mypy because it doesn't understand ROOT
    return cast(float, factor)
Ejemplo n.º 11
0
def measure_mixed_event_normalization(
    mixed_event: Hist,
    eta_limits: Tuple[float, float],
    delta_phi_rebin_factor: int = 1
) -> Tuple[float, float, histogram.Histogram1D]:
    """ Determine normalization of the mixed event.

    The normalization is determined by using the moving average of half of the histogram.

    We need to project over a range of constant eta to be able to use the extracted max in the 2D
    mixed event. Joel uses [-0.4, 0.4], but it really seems to drop in the 0.4 bin, so instead I'll
    use 0.3 This value also depends on the max track eta. For 0.9, it should be 0.4 (0.9-0.5), but
    for 0.8, it should be 0.3 (0.8-0.5)

    Note:
        This assumes that delta phi is on the x axis and delta eta is on the y axis.

    Args:
        mixed_event: Mixed event histogram.
        eta_limits: Min and max eta range limits.
        delta_phi_rebin_factor: Factor by which we will rebin the mixed event, and therefore by which we
            must scaled the mixed event normalization.
    Returns:
        Mixed event normalization value, max systematic, histogram used for determining the normalization
    """
    # Project to 1D delta phi so it can be used with the signal finder
    peak_finding_hist, peak_finding_hist_array = _peak_finding_objects_from_mixed_event(
        mixed_event=mixed_event, eta_limits=eta_limits)

    # Using moving average looking at window half of the size of the delta phi axis (ie looking 5 bins
    # ahead if there 10 bins in the axis).
    moving_avg = utils.moving_average(peak_finding_hist_array,
                                      n=mixed_event.GetXaxis().GetNbins() // 2)
    max_moving_avg: float = np.max(moving_avg)

    # Finally determine the mixed event normalziation.
    mixed_event_normalization = max_moving_avg
    # Watch out for a zero would could cause problems.
    if not mixed_event_normalization != 0:
        logger.warning(
            f"Could not normalize the mixed event hist \"{mixed_event.GetName()}\" due to no data at (0,0)!"
        )
        mixed_event_normalization = 1

    # Account for a rebin factor. For example, if we rebin by 2, then we need to scale up
    # the normalization factor by 2.
    mixed_event_normalization *= delta_phi_rebin_factor

    # Measure the systematic. Compare the moving average to the 1D and 2D fit. Take the max difference
    fit1D = fitting.fit_1d_mixed_event_normalization(
        peak_finding_hist, [1. / 2. * np.pi, 3. / 2. * np.pi])
    max_linear_fit_1D = fit1D.GetParameter(0)
    fit2D = fitting.fit_2d_mixed_event_normalization(
        mixed_event, [1. / 2. * np.pi, 3. / 2. * np.pi], eta_limits)
    max_linear_fit_2D = fit2D.GetParameter(0)

    # For the systematic, we'll just evaluate the moving average vs the fits.
    fit_systematic_1D = np.abs(max_moving_avg - max_linear_fit_1D)
    fit_systematic_2D = np.abs(max_moving_avg - max_linear_fit_2D)
    max_systematic = np.max([fit_systematic_1D, fit_systematic_2D])

    logger.debug(f"mixed_event_normalization_uncertainty: {max_systematic}")
    logger.debug("mixed_event_normalization_uncertainty fractional: "
                 f" {max_systematic / mixed_event_normalization}")

    return mixed_event_normalization, max_systematic, histogram.Histogram1D.from_existing_hist(
        peak_finding_hist)