Esempio n. 1
0
def test_numpyAPI_var1d():
    x = np.random.randn(5000)
    bins = [-1.2, -1, -0.2, 0.7, 1.5, 2.1]
    w = np.random.uniform(0.5, 1.9, 5000)

    pygram_h, __ = pygram11.histogram(x, bins=bins)
    numpy_h, __ = np.histogram(x, bins=bins)
    npt.assert_almost_equal(pygram_h, numpy_h, 5)

    pygram_h, __ = pygram11.histogram(x, bins=bins, weights=w)
    numpy_h, __ = np.histogram(x, bins=bins, weights=w)
    npt.assert_almost_equal(pygram_h, numpy_h, 5)
Esempio n. 2
0
def test_numpyAPI_fix1d():
    x = np.random.randn(5000)
    bins = 25
    w = np.random.uniform(0.8, 1, 5000)

    pygram_h, __ = pygram11.histogram(x, bins=25, range=(-3, 3))
    numpy_h, __ = np.histogram(x, bins=np.linspace(-3, 3, 26))
    npt.assert_almost_equal(pygram_h, numpy_h, 5)

    pygram_h, __ = pygram11.histogram(x, bins=25, range=(-3, 3), weights=w)
    numpy_h, __ = np.histogram(x, bins=np.linspace(-3, 3, 26), weights=w)
    npt.assert_almost_equal(pygram_h, numpy_h, 5)
Esempio n. 3
0
def dist_comparison_plot(
    var: str,
    region: str,
    binning: tuple[int, float, float],
    df: pd.DataFrame,
    y: np.ndarray,
    w: np.ndarray,
    meta_table,
    outdir: Path,
) -> None:
    """Compare shapes of BDT variable."""
    is_tW = y == 1
    is_tt = y == 0
    w_tW = w[is_tW]
    w_tt = w[is_tt]
    tW_dist = df[var][is_tW].to_numpy()
    tt_dist = df[var][is_tt].to_numpy()
    bins = np.linspace(binning[1], binning[2], binning[0] + 1)
    centers = bin_centers(bins)
    bw = bins[1] - bins[0]

    fig, (ax, axr) = plt.subplots(
        2,
        1,
        sharex=True,
        gridspec_kw=dict(height_ratios=[3.25, 1], hspace=0.025),
    )

    n1, e1 = histogram(tW_dist, weights=w_tW, bins=bins, density=True, flow=True)
    n2, e2 = histogram(tt_dist, weights=w_tt, bins=bins, density=True, flow=True)
    r = n1 / n2
    ax.hist(centers, weights=n1, bins=bins, label=r"$tW$", histtype="step")
    ax.hist(centers, weights=n2, bins=bins, label=r"$t\bar{t}$", histtype="step")
    axr.hist(centers, bins=bins, weights=r, histtype="step", color="black")
    ax.set_ylabel("Arb. Units", ha="right", y=1.0)
    xl, yl = meta_axis_label(var, bw, meta_table)
    axr.set_xlabel(xl, ha="right", x=1.0)
    axr.set_xlim([binning[1], binning[2]])
    axr.set_ylabel(r"$tW/t\bar{t}$")
    ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1] * 1.35])
    ax.legend()
    axr.axhline(y=1, ls="--", color="gray")
    if np.amax(r) > 2:
        axr.set_ylim([0, 3])
    else:
        axr.set_ylim([0, 2])
    tdub.art.draw_atlas_label(ax, thesis=True)
    fig.savefig(outdir / f"r{region}_SC_{var}.pdf")
    plt.close(fig)
Esempio n. 4
0
 def test_flow_omp_var():
     x = np.random.randn(100000)
     bins = [-2, -1.7, -0.5, 0.2, 2.2]
     pygram_h, __ = pygram11.histogram(x, bins=bins, omp=True, flow=True)
     numpy_h, __ = np.histogram(x, bins=bins)
     numpy_h[0] += sum(x < bins[0])
     numpy_h[-1] += sum(x > bins[-1])
     assert np.all(pygram_h == numpy_h)
Esempio n. 5
0
 def test_flow_omp():
     x = np.random.randn(100000)
     nbins = 50
     rg = (-3, 3)
     pygram_h, __ = pygram11.histogram(x,
                                       bins=nbins,
                                       range=rg,
                                       omp=True,
                                       flow=True)
     numpy_h, __ = np.histogram(x, bins=nbins, range=rg)
     numpy_h[0] += sum(x < rg[0])
     numpy_h[-1] += sum(x > rg[1])
     assert np.all(pygram_h == numpy_h)
Esempio n. 6
0
 def test_flow_weights_omp_var():
     x = np.random.randn(100000)
     w = np.random.uniform(0.5, 0.8, x.shape[0])
     bins = [-2, -1.7, -0.5, 0.2, 2.2]
     pygram_h, __ = pygram11.histogram(x,
                                       bins=bins,
                                       weights=w,
                                       omp=True,
                                       flow=True)
     numpy_h, __ = np.histogram(x, bins=bins, weights=w)
     numpy_h[0] += sum(w[x < bins[0]])
     numpy_h[-1] += sum(w[x > bins[-1]])
     assert np.allclose(pygram_h, numpy_h)
Esempio n. 7
0
 def test_flow_weights_omp():
     x = np.random.randn(100000)
     w = np.random.uniform(0.5, 0.8, x.shape[0])
     nbins = 50
     rg = (-3, 3)
     pygram_h, __ = pygram11.histogram(x,
                                       bins=nbins,
                                       range=rg,
                                       weights=w,
                                       omp=True,
                                       flow=True)
     numpy_h, __ = np.histogram(x, bins=nbins, range=rg, weights=w)
     numpy_h[0] += sum(w[x < rg[0]])
     numpy_h[-1] += sum(w[x > rg[1]])
     assert np.allclose(pygram_h, numpy_h)
Esempio n. 8
0
def draw_stack(
    *,
    data_df: pd.DataFrame,
    mc_dfs: List[pd.DataFrame],
    distribution: str,
    weight_name: str = "weight_nominal",
    bins: Union[int, Sequence[numbers.Real]] = 10,
    range: Optional[Tuple[float, float]] = None,
    colors: Optional[Iterable[Any]] = None,
    labels: Optional[Iterable[str]] = None,
    lumi: float = 139.0,
    legend_ncol: int = 2,
    y_scalefac: float = 1.35,
) -> Tuple[plt.Figure, plt.Axes, plt.Axes]:
    """Given dataframes draw the stacked histograms for a distribution.

    Parameters
    ----------
    data_df : pandas.DataFrame
       the dataframe for data
    mc_dfs : list(pandas.DataFrame)
       the list of MC dataframes
    distribution: str
       the variable to histogram
    weight_name : str
       the name of the weight column
    bins : int or sequence of scalars
       the number of bins or sequence representing bin edges
    range : tuple(float, float), optional
       the range to histogram the distribution (used for integral
       bins, ignored if ``bins`` is a sequence).
    colors : list(Any), optional
       the colors for the Monte Carlo histograms, ``None`` defaults to
       the normal colors associated with our standard samples
    labels : list(str), optional
       the list of labels for the legend. ``None`` default sto the the
       normal labels associated with out standard samples
    lumi : float
       the luminosity for the data (to scale the MC)
    legend_ncol : int
       number of columns for the legend
    y_scalefac : float
       factor to scale the default maximum y-axis range by

    Returns
    -------
    :py:obj:`matplotlib.figure.Figure`
       the figure associated with the axes
    :py:obj:`matplotlib.axes.Axes`
       the main axis object which has the plot
    :py:obj:`matplotlib.axes.Axes`
       the axis object which has the ratio plot

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from tdub.raw_art import draw_stack
    >>> mc_dfs = get_mc_dataframes()   # user defined function returns a list of dataframes
    >>> data_df = get_data_dataframe() # user defined function returns a single dataframe
    >>> colors = list(reversed(["#1f77b4", "#d62728", "#2ca02c", "#ff7f0e", "#9467bd"]))
    >>> labels = list(reversed(["$tW$", "$t\\bar{t}$", "Diboson", "$Z+$jets", "MCNP"]))
    >>> fig, ax, axr = draw_stacks(
    ...     data_df=datadf,
    ...     mc_dfs=mc_dfs,
    ...     labels=labels,
    ...     colors=colors,
    ...     distribution="mass_lep1jet2",
    ...     bins=25,
    ...     range=(0, 250.0),
    ... )
    >>> fig.savefig("mass_lep1jet2.pdf")
    >>> plt.close(fig)
    """
    data_count, __ = pygram11.histogram(data_df[distribution].to_numpy(),
                                        bins=bins,
                                        range=range,
                                        flow=True)
    data_err = np.sqrt(data_count)
    mc_dists = [df[distribution].to_numpy() for df in mc_dfs]
    mc_ws = [df[weight_name].to_numpy() * lumi for df in mc_dfs]
    mc_hists = [
        pygram11.histogram(mcd, weights=mcw, bins=bins, range=range, flow=True)
        for mcd, mcw in zip(mc_dists, mc_ws)
    ]
    mc_counts = [mcc[0] for mcc in mc_hists]
    mc_errs = [mcc[1] for mcc in mc_hists]
    mc_total = np.sum(mc_counts, axis=0)
    ratio = data_count / mc_total
    mc_total_err = np.sqrt(np.sum([mce**2 for mce in mc_errs], axis=0))
    ratio_err = data_count / (mc_total**2) + np.power(
        data_count * mc_total_err / (mc_total**2), 2)

    if colors is None:
        colors = ["#1f77b4", "#d62728", "#2ca02c", "#ff7f0e", "#9467bd"]
        colors.reverse()
    if labels is None:
        labels = ["$tW$", "$t\\bar{t}$", "Diboson", "$Z+$jets", "MCNP"]
        labels.reverse()

    edges, centers = edges_and_centers(bins, range=range)
    fig, (ax, axr) = plt.subplots(
        2,
        1,
        sharex=True,
        figsize=(6, 5.25),
        gridspec_kw=dict(height_ratios=[3.25, 1], hspace=0.025),
    )

    ax.hist(
        [centers for _ in labels],
        weights=mc_counts,
        bins=edges,
        histtype="stepfilled",
        label=labels,
        color=colors,
        stacked=True,
    )
    ax.errorbar(centers,
                data_count,
                yerr=data_err,
                fmt="ko",
                label="Data",
                zorder=999)

    ax.set_ylim([0, ax.get_ylim()[1] * y_scalefac])

    ax.legend(loc="upper right")
    handles, labels = ax.get_legend_handles_labels()
    handles.insert(0, handles.pop())
    labels.insert(0, labels.pop())
    ax.legend(handles, labels, loc="upper right", ncol=legend_ncol)

    axr.errorbar(centers, ratio, yerr=ratio_err, fmt="ko", zorder=999)
    axr.plot([edges[0], edges[-1]], [1, 1],
             color="gray",
             linestyle="solid",
             marker=None)
    axr.set_ylim([0.8, 1.2])
    axr.set_yticks([0.9, 1.0, 1.1])
    axr.autoscale(enable=True, axis="x", tight=True)

    return fig, ax, axr
Esempio n. 9
0
def plot_from_region_frames(
    frames: Dict[str, pd.DataFrame],
    variable: str,
    binning: Tuple[int, float, float],
    region_label: str,
    logy: bool = False,
    legend_kw: Dict[str, Any] = None,
) -> Tuple[plt.Figure, plt.Axes, plt.Axes]:
    """create a histogram plot pdf from dataframes and a desired variable

    Parameters
    ----------
    frames : dict(str, pd.DataFrame)
        the dataframes for all samples
    variable : str
        the variable we want to histogram
    binning : tuple(int, float, float)
        the bin definition
    region_label : str
        the region label (will be part of out file name)
    logy : bool
        if true set the yscale to log
    legend_kw : dict(str, Any)
        keyward arguments passed to :py:func:`matplotlib.Axes.axes.legend`.

    """
    if variable not in frames["Data"].columns.to_list():
        log.warning("%s not in dataframe; skipping" % variable)
        return None, None, None
    nbins, start, stop = binning
    bin_edges = np.linspace(start, stop, nbins + 1)
    counts = {}
    errors = {}
    for samp in ALL_SAMPLES:
        x = frames[samp][variable].to_numpy()
        w = frames[samp]["weight_nominal"].to_numpy()
        count, err = pg.histogram(x,
                                  bins=nbins,
                                  range=(start, stop),
                                  weights=w,
                                  flow=True)
        counts[samp] = count
        errors[samp] = err
    fig, ax, axr = canvas_from_counts(counts, errors, bin_edges)

    draw_atlas_label(
        ax,
        extra_lines=["$\sqrt{s} = 13$ TeV, $L = 139$ fb$^{-1}$", region_label])
    tune_axes(ax, axr, variable, binning, logy=logy)

    if legend_kw is None:
        legend_kw = {}
    legend_kw["ncol"] = 2
    ax.legend(loc="upper right")
    handles, labels = ax.get_legend_handles_labels()
    handles.insert(0, handles.pop())
    labels.insert(0, labels.pop())
    ax.legend(handles, labels, loc="upper right", **legend_kw)

    fig.subplots_adjust(left=0.125, bottom=0.095, right=0.965, top=0.95)
    return fig, ax, axr