def test_numpyAPI_var1d(): x = np.random.randn(5000) bins = [-1.2, -1, -0.2, 0.7, 1.5, 2.1] w = np.random.uniform(0.5, 1.9, 5000) pygram_h, __ = pygram11.histogram(x, bins=bins) numpy_h, __ = np.histogram(x, bins=bins) npt.assert_almost_equal(pygram_h, numpy_h, 5) pygram_h, __ = pygram11.histogram(x, bins=bins, weights=w) numpy_h, __ = np.histogram(x, bins=bins, weights=w) npt.assert_almost_equal(pygram_h, numpy_h, 5)
def test_numpyAPI_fix1d(): x = np.random.randn(5000) bins = 25 w = np.random.uniform(0.8, 1, 5000) pygram_h, __ = pygram11.histogram(x, bins=25, range=(-3, 3)) numpy_h, __ = np.histogram(x, bins=np.linspace(-3, 3, 26)) npt.assert_almost_equal(pygram_h, numpy_h, 5) pygram_h, __ = pygram11.histogram(x, bins=25, range=(-3, 3), weights=w) numpy_h, __ = np.histogram(x, bins=np.linspace(-3, 3, 26), weights=w) npt.assert_almost_equal(pygram_h, numpy_h, 5)
def dist_comparison_plot( var: str, region: str, binning: tuple[int, float, float], df: pd.DataFrame, y: np.ndarray, w: np.ndarray, meta_table, outdir: Path, ) -> None: """Compare shapes of BDT variable.""" is_tW = y == 1 is_tt = y == 0 w_tW = w[is_tW] w_tt = w[is_tt] tW_dist = df[var][is_tW].to_numpy() tt_dist = df[var][is_tt].to_numpy() bins = np.linspace(binning[1], binning[2], binning[0] + 1) centers = bin_centers(bins) bw = bins[1] - bins[0] fig, (ax, axr) = plt.subplots( 2, 1, sharex=True, gridspec_kw=dict(height_ratios=[3.25, 1], hspace=0.025), ) n1, e1 = histogram(tW_dist, weights=w_tW, bins=bins, density=True, flow=True) n2, e2 = histogram(tt_dist, weights=w_tt, bins=bins, density=True, flow=True) r = n1 / n2 ax.hist(centers, weights=n1, bins=bins, label=r"$tW$", histtype="step") ax.hist(centers, weights=n2, bins=bins, label=r"$t\bar{t}$", histtype="step") axr.hist(centers, bins=bins, weights=r, histtype="step", color="black") ax.set_ylabel("Arb. Units", ha="right", y=1.0) xl, yl = meta_axis_label(var, bw, meta_table) axr.set_xlabel(xl, ha="right", x=1.0) axr.set_xlim([binning[1], binning[2]]) axr.set_ylabel(r"$tW/t\bar{t}$") ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1] * 1.35]) ax.legend() axr.axhline(y=1, ls="--", color="gray") if np.amax(r) > 2: axr.set_ylim([0, 3]) else: axr.set_ylim([0, 2]) tdub.art.draw_atlas_label(ax, thesis=True) fig.savefig(outdir / f"r{region}_SC_{var}.pdf") plt.close(fig)
def test_flow_omp_var(): x = np.random.randn(100000) bins = [-2, -1.7, -0.5, 0.2, 2.2] pygram_h, __ = pygram11.histogram(x, bins=bins, omp=True, flow=True) numpy_h, __ = np.histogram(x, bins=bins) numpy_h[0] += sum(x < bins[0]) numpy_h[-1] += sum(x > bins[-1]) assert np.all(pygram_h == numpy_h)
def test_flow_omp(): x = np.random.randn(100000) nbins = 50 rg = (-3, 3) pygram_h, __ = pygram11.histogram(x, bins=nbins, range=rg, omp=True, flow=True) numpy_h, __ = np.histogram(x, bins=nbins, range=rg) numpy_h[0] += sum(x < rg[0]) numpy_h[-1] += sum(x > rg[1]) assert np.all(pygram_h == numpy_h)
def test_flow_weights_omp_var(): x = np.random.randn(100000) w = np.random.uniform(0.5, 0.8, x.shape[0]) bins = [-2, -1.7, -0.5, 0.2, 2.2] pygram_h, __ = pygram11.histogram(x, bins=bins, weights=w, omp=True, flow=True) numpy_h, __ = np.histogram(x, bins=bins, weights=w) numpy_h[0] += sum(w[x < bins[0]]) numpy_h[-1] += sum(w[x > bins[-1]]) assert np.allclose(pygram_h, numpy_h)
def test_flow_weights_omp(): x = np.random.randn(100000) w = np.random.uniform(0.5, 0.8, x.shape[0]) nbins = 50 rg = (-3, 3) pygram_h, __ = pygram11.histogram(x, bins=nbins, range=rg, weights=w, omp=True, flow=True) numpy_h, __ = np.histogram(x, bins=nbins, range=rg, weights=w) numpy_h[0] += sum(w[x < rg[0]]) numpy_h[-1] += sum(w[x > rg[1]]) assert np.allclose(pygram_h, numpy_h)
def draw_stack( *, data_df: pd.DataFrame, mc_dfs: List[pd.DataFrame], distribution: str, weight_name: str = "weight_nominal", bins: Union[int, Sequence[numbers.Real]] = 10, range: Optional[Tuple[float, float]] = None, colors: Optional[Iterable[Any]] = None, labels: Optional[Iterable[str]] = None, lumi: float = 139.0, legend_ncol: int = 2, y_scalefac: float = 1.35, ) -> Tuple[plt.Figure, plt.Axes, plt.Axes]: """Given dataframes draw the stacked histograms for a distribution. Parameters ---------- data_df : pandas.DataFrame the dataframe for data mc_dfs : list(pandas.DataFrame) the list of MC dataframes distribution: str the variable to histogram weight_name : str the name of the weight column bins : int or sequence of scalars the number of bins or sequence representing bin edges range : tuple(float, float), optional the range to histogram the distribution (used for integral bins, ignored if ``bins`` is a sequence). colors : list(Any), optional the colors for the Monte Carlo histograms, ``None`` defaults to the normal colors associated with our standard samples labels : list(str), optional the list of labels for the legend. ``None`` default sto the the normal labels associated with out standard samples lumi : float the luminosity for the data (to scale the MC) legend_ncol : int number of columns for the legend y_scalefac : float factor to scale the default maximum y-axis range by Returns ------- :py:obj:`matplotlib.figure.Figure` the figure associated with the axes :py:obj:`matplotlib.axes.Axes` the main axis object which has the plot :py:obj:`matplotlib.axes.Axes` the axis object which has the ratio plot Examples -------- >>> import matplotlib.pyplot as plt >>> from tdub.raw_art import draw_stack >>> mc_dfs = get_mc_dataframes() # user defined function returns a list of dataframes >>> data_df = get_data_dataframe() # user defined function returns a single dataframe >>> colors = list(reversed(["#1f77b4", "#d62728", "#2ca02c", "#ff7f0e", "#9467bd"])) >>> labels = list(reversed(["$tW$", "$t\\bar{t}$", "Diboson", "$Z+$jets", "MCNP"])) >>> fig, ax, axr = draw_stacks( ... data_df=datadf, ... mc_dfs=mc_dfs, ... labels=labels, ... colors=colors, ... distribution="mass_lep1jet2", ... bins=25, ... range=(0, 250.0), ... ) >>> fig.savefig("mass_lep1jet2.pdf") >>> plt.close(fig) """ data_count, __ = pygram11.histogram(data_df[distribution].to_numpy(), bins=bins, range=range, flow=True) data_err = np.sqrt(data_count) mc_dists = [df[distribution].to_numpy() for df in mc_dfs] mc_ws = [df[weight_name].to_numpy() * lumi for df in mc_dfs] mc_hists = [ pygram11.histogram(mcd, weights=mcw, bins=bins, range=range, flow=True) for mcd, mcw in zip(mc_dists, mc_ws) ] mc_counts = [mcc[0] for mcc in mc_hists] mc_errs = [mcc[1] for mcc in mc_hists] mc_total = np.sum(mc_counts, axis=0) ratio = data_count / mc_total mc_total_err = np.sqrt(np.sum([mce**2 for mce in mc_errs], axis=0)) ratio_err = data_count / (mc_total**2) + np.power( data_count * mc_total_err / (mc_total**2), 2) if colors is None: colors = ["#1f77b4", "#d62728", "#2ca02c", "#ff7f0e", "#9467bd"] colors.reverse() if labels is None: labels = ["$tW$", "$t\\bar{t}$", "Diboson", "$Z+$jets", "MCNP"] labels.reverse() edges, centers = edges_and_centers(bins, range=range) fig, (ax, axr) = plt.subplots( 2, 1, sharex=True, figsize=(6, 5.25), gridspec_kw=dict(height_ratios=[3.25, 1], hspace=0.025), ) ax.hist( [centers for _ in labels], weights=mc_counts, bins=edges, histtype="stepfilled", label=labels, color=colors, stacked=True, ) ax.errorbar(centers, data_count, yerr=data_err, fmt="ko", label="Data", zorder=999) ax.set_ylim([0, ax.get_ylim()[1] * y_scalefac]) ax.legend(loc="upper right") handles, labels = ax.get_legend_handles_labels() handles.insert(0, handles.pop()) labels.insert(0, labels.pop()) ax.legend(handles, labels, loc="upper right", ncol=legend_ncol) axr.errorbar(centers, ratio, yerr=ratio_err, fmt="ko", zorder=999) axr.plot([edges[0], edges[-1]], [1, 1], color="gray", linestyle="solid", marker=None) axr.set_ylim([0.8, 1.2]) axr.set_yticks([0.9, 1.0, 1.1]) axr.autoscale(enable=True, axis="x", tight=True) return fig, ax, axr
def plot_from_region_frames( frames: Dict[str, pd.DataFrame], variable: str, binning: Tuple[int, float, float], region_label: str, logy: bool = False, legend_kw: Dict[str, Any] = None, ) -> Tuple[plt.Figure, plt.Axes, plt.Axes]: """create a histogram plot pdf from dataframes and a desired variable Parameters ---------- frames : dict(str, pd.DataFrame) the dataframes for all samples variable : str the variable we want to histogram binning : tuple(int, float, float) the bin definition region_label : str the region label (will be part of out file name) logy : bool if true set the yscale to log legend_kw : dict(str, Any) keyward arguments passed to :py:func:`matplotlib.Axes.axes.legend`. """ if variable not in frames["Data"].columns.to_list(): log.warning("%s not in dataframe; skipping" % variable) return None, None, None nbins, start, stop = binning bin_edges = np.linspace(start, stop, nbins + 1) counts = {} errors = {} for samp in ALL_SAMPLES: x = frames[samp][variable].to_numpy() w = frames[samp]["weight_nominal"].to_numpy() count, err = pg.histogram(x, bins=nbins, range=(start, stop), weights=w, flow=True) counts[samp] = count errors[samp] = err fig, ax, axr = canvas_from_counts(counts, errors, bin_edges) draw_atlas_label( ax, extra_lines=["$\sqrt{s} = 13$ TeV, $L = 139$ fb$^{-1}$", region_label]) tune_axes(ax, axr, variable, binning, logy=logy) if legend_kw is None: legend_kw = {} legend_kw["ncol"] = 2 ax.legend(loc="upper right") handles, labels = ax.get_legend_handles_labels() handles.insert(0, handles.pop()) labels.insert(0, labels.pop()) ax.legend(handles, labels, loc="upper right", **legend_kw) fig.subplots_adjust(left=0.125, bottom=0.095, right=0.965, top=0.95) return fig, ax, axr