def test_unbinned_data(): n = 751 gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n, 70) x = znp.linspace(-5, 10, 200) centers = obs_binned.binning.centers[0] y_binned = gauss_binned.pdf(x) y_true = gauss.pdf(x) max_error = np.max(y_true) / 10 np.testing.assert_allclose(y_true, y_binned, atol=max_error) ycenter_binned = gauss_binned.pdf(centers) ycenter_true = gauss.pdf(centers) np.testing.assert_allclose(ycenter_binned, ycenter_true, atol=max_error / 10) x_outside = znp.array([-7.0, 3.0, 12]) y_outside = gauss_binned.pdf(x_outside) assert y_outside[0] == 0 assert y_outside[1] > 0 assert y_outside[2] == 0 plt.figure() plt.title("Binned Gauss evaluated on unbinned edges") plt.plot(centers, ycenter_true, label="unbinned pdf") plt.plot(centers, ycenter_binned, "--", label="binned pdf") plt.legend() pytest.zfit_savefig() # plt.show() plt.figure() plt.title("Binned Gauss evaluated on unbinned data") plt.plot(x, y_true, label="unbinned pdf") plt.plot(x, y_binned, "--", label="binned pdf") plt.legend() pytest.zfit_savefig()
def test_spline_from_binned_from_unbinned(): n = 1004 gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n) x = znp.linspace(-5, 10, n // 5) # values = gauss_binned.rel_counts(obs_binned) sample = gauss_binned.sample(n, limits=obs_binned) title = "Comparison of binned gaussian and sample" plt.figure() plt.title(title) mplhep.histplot(sample.to_hist(), label="sampled binned") plt.plot( obs_binned.binning.centers[0], gauss_binned.counts(obs_binned), label="counts binned", ) plt.legend() pytest.zfit_savefig() spline_gauss = SplinePDF(gauss_binned, obs=obs) # spline_gauss.set_yield(n) # HACK y = spline_gauss.ext_pdf(x) y_true = gauss.ext_pdf(x) plt.figure() plt.title("Comparison of unbinned gauss to binned to interpolated") plt.plot( obs_binned.binning.centers[0], gauss_binned.ext_pdf(obs_binned), "x", label="binned", ) plt.plot(x, y_true, label="original") plt.plot(x, y, ".", label="interpolated") plt.legend() pytest.zfit_savefig() np.testing.assert_allclose(y, y_true, atol=50)
def test_unbinned_from_binned_from_unbinned(): n = 1004 gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n) x = znp.linspace(-5, 10, n // 5) # values = gauss_binned.rel_counts(obs_binned) sample = gauss_binned.sample(n, limits=obs_binned) title = "Comparison of binned gaussian and sample" plt.figure() plt.title(title) mplhep.histplot(sample.to_hist(), label="sampled binned") plt.plot( obs_binned.binning.centers[0], gauss_binned.counts(obs_binned), label="counts binned", ) plt.legend() pytest.zfit_savefig() unbinned = zfit.pdf.UnbinnedFromBinnedPDF(gauss_binned, obs=obs) y = unbinned.ext_pdf(x) y_true = gauss.ext_pdf(x) plt.figure() plt.title("Comparison of unbinned gauss to binned to unbinned again") plt.plot( obs_binned.binning.centers[0], gauss_binned.ext_pdf(obs_binned), "x", label="binned", ) plt.plot(x, y_true, label="original") plt.plot(x, y, ".", label="unbinned") plt.legend() pytest.zfit_savefig() np.testing.assert_allclose(y, y_true, atol=50) nsample = 500000 sample_binned = unbinned.sample(nsample).to_binned(obs_binned) sample_binned_hist = sample_binned.to_hist() sample_gauss = gauss.sample(nsample).to_binned(obs_binned) sample_gauss_hist = sample_gauss.to_hist() title = "Comparison of unbinned gaussian and unbinned from binned sampled" plt.figure() plt.title(title) mplhep.histplot(sample_binned_hist, label="unbinned from binned") mplhep.histplot(sample_gauss_hist, label="original") plt.legend() pytest.zfit_savefig() diff = (sample_binned_hist.values() - sample_gauss_hist.values()) / ( sample_gauss_hist.variances() + 1 ) ** 0.5 np.testing.assert_allclose(diff, 0, atol=7) # 7 sigma away
def test_conv_1d_shifted(interpolation): kerlim = (-3, 3) # symmetric to make the np conv comparison simple obs_kernel = zfit.Space("obs1", limits=kerlim) obs = zfit.Space("obs1", limits=(5, 15)) func1 = zfit.pdf.GaussianKDE1DimV1(obs=obs, data=np.random.uniform(6, 12, size=100)) # func1 = zfit.pdf.Uniform(6, 12, obs=obs) func2 = zfit.pdf.Uniform(11, 11.5, obs=obs) func = zfit.pdf.SumPDF([func1, func2], 0.5) func1k = zfit.pdf.Gauss(0.0, 1, obs=obs_kernel) func2k = zfit.pdf.Gauss(1.0, 0.4, obs=obs_kernel) funck = zfit.pdf.SumPDF([func1k, func2k], 0.5) conv = zfit.pdf.FFTConvPDFV1(func=func, kernel=funck, n=200) xnp = tf.linspace(obs_kernel.rect_lower, obs.rect_upper, 4023) # true convolution kernel_points = obs_kernel.filter(xnp) x = obs.filter(xnp) probs = conv.pdf(x=x) true_conv = true_conv_np(func, funck, obs, x=x, xkernel=kernel_points) integral = conv.integrate( limits=obs, ) probs_np = probs.numpy() np.testing.assert_allclose(probs_np, true_conv, rtol=0.01, atol=0.01) assert pytest.approx(1, rel=1e-3) == integral.numpy() plt.figure() plt.title("Conv FFT 1Dim shift testing") plt.plot(x, probs_np, label="zfit") plt.plot(x, true_conv, label="numpy") plt.legend() pytest.zfit_savefig()
def test_binned_from_unbinned_2D(): zfit.run.set_graph_mode(True) n = 100000 mu = zfit.Parameter("mu", 1, 0, 19) sigma = zfit.Parameter("sigma", 6, 0, 120) obsx = zfit.Space("x", (-5, 10)) obsy = zfit.Space("y", (-50, 100)) gaussx = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obsx) muy = mu + 3 sigmay = sigma * 20 gaussy = zfit.pdf.Gauss(mu=muy, sigma=sigmay, obs=obsy) gauss2D = zfit.pdf.ProductPDF([gaussx, gaussy]) axisx = zfit.binned.VariableBinning( np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0), name="x", ) axisxhist = hist.axis.Variable( np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0), name="x", ) axisy = zfit.binned.RegularBinning(15, -50, 100, name="y") axisyhist = hist.axis.Regular(15, -50, 100, name="y") obs_binnedx = zfit.Space(["x"], binning=axisx) obs_binnedy = zfit.Space("y", binning=axisy) obs_binned = obs_binnedx * obs_binnedy gauss_binned = BinnedFromUnbinnedPDF(pdf=gauss2D, space=obs_binned, extended=n) values = gauss_binned.rel_counts(obs_binned) # TODO: good test? start = time.time() ntrial = 10 for _ in range(ntrial): values = gauss_binned.rel_counts(obs_binned) print(f"Time taken {(time.time() - start) / ntrial}") hist2d = hist.Hist(axisxhist, axisyhist) nruns = 5 npoints = 5_000_000 for _ in range(nruns): normal2d = np.random.normal( [float(mu), float(muy)], [float(sigma), float(sigmay)], size=(npoints, 2) ) hist2d.fill(*normal2d.T, threads=4) diff = np.abs(values * hist2d.sum() - hist2d.counts()) - 6.5 * np.sqrt( hist2d.counts() ) # 5 sigma for 1000 bins print(diff) np.testing.assert_array_less(diff, 0) sample = gauss_binned.sample(n, limits=obs_binned) hist_sampled = sample.to_hist() hist_pdf = gauss_binned.to_hist() max_error = hist_sampled.values() * 6**2 # 6 sigma away np.testing.assert_array_less( (hist_sampled.values() - hist_pdf.values()) ** 2, max_error ) plt.figure() plt.title("Gauss 2D binned sampled.") mplhep.hist2dplot(hist_sampled) pytest.zfit_savefig() plt.figure() plt.title("Gauss 2D binned plot, irregular (x<4.5 larger bins than x>4.5) binning.") mplhep.hist2dplot(hist_pdf) pytest.zfit_savefig()
def test_morphing_templates(alphas): bins1 = 15 irregular_str = "irregular templates" if alphas is not None else "" counts1 = np.random.uniform(70, high=100, size=bins1) # generate counts counts = [ counts1 - np.random.uniform(high=20, size=bins1), counts1, counts1 + np.random.uniform(high=20, size=bins1), ] if alphas is not None: counts.append(counts1 + np.random.uniform(high=5, size=bins1)) binning = zfit.binned.RegularBinning(bins1, 0, 10, name="obs1") obs = zfit.Space(obs="obs1", binning=binning) histos = [BinnedData.from_tensor(obs, count) for count in counts] pdfs = [zfit.pdf.HistogramPDF(h) for h in histos] if alphas is not None: pdfs = {a: p for a, p in zip(alphas, pdfs)} alpha = zfit.Parameter("alpha", 0, -5, 5) morph = SplineMorphingPDF(alpha=alpha, hists=pdfs) if alphas is None: alphas = [-1, 0, 1] for i, a in enumerate(alphas): alpha.set_value(a) np.testing.assert_allclose(morph.counts(), counts[i]) if len(alphas) > i + 1: alpha.set_value((a + alphas[i + 1]) / 2) max_dist = (counts[i] - counts[i + 1])**2 + 5 # tolerance max_dist *= 1.1 # not strict, it can be a bit higher numpy.testing.assert_array_less((morph.counts() - counts[i])**2, max_dist) numpy.testing.assert_array_less( (morph.counts() - counts[i + 1])**2, max_dist) import matplotlib.cm as cm amin, amax = -2, 2 n = 5 template_alphas = np.array(list(alphas)) for do_3d in [True, False]: plt.figure() if do_3d: ax = plt.gcf().add_subplot(111, projection="3d") else: ax = plt.gca() plotstyle = "3d plot" if do_3d else "hist plot" plt.title(f"Morphing with splines {irregular_str} {plotstyle}") for a in list(znp.linspace(amin, amax, n * 2)) + list(template_alphas): normed_a = (a - amin) / (amax - amin) / 1.3 # 3 is a scaling factor color = cm.get_cmap("winter")(normed_a) alpha.set_value(a) histo = morph.ext_pdf(None) histo = BinnedData.from_tensor(obs, histo) histo = histo.to_hist() values = histo.values() x = histo.axes.edges[0][:-1] y = np.broadcast_to(a, values.shape) z = values label = None if do_3d: ax.step(x, y, z, color=color, where="pre", label=label) else: if np.min((a - template_alphas)**2) < 0.0001: label = f"alpha={a}" mplhep.histplot(histo, label=label, color=color) ax.set_xlabel("observable") ax.set_ylabel("alpha") if do_3d: ax.set_zlabel("ext_pdf") plt.legend() pytest.zfit_savefig()
def test_binned_loss(weights, Loss, simultaneous): obs = zfit.Space("obs1", limits=(-15, 25)) gaussian1, mu1, sigma1 = create_gauss1(obs=obs) gaussian2, mu2, sigma2 = create_gauss2(obs=obs) test_values_np_shifted = test_values_np - 1.8 # shift them a bit test_values_np_shifted *= 1.2 test_values = znp.array(test_values_np_shifted) test_values = zfit.Data.from_tensor(obs=obs, tensor=test_values, weights=weights) init_yield = test_values_np.shape[0] * 1.2 scale = zfit.Parameter("yield", init_yield, 0, init_yield * 4, step_size=1) binning = zfit.binned.RegularBinning(32, obs.lower[0], obs.upper[0], name="obs1") obs_binned = obs.with_binning(binning) test_values_binned = test_values.to_binned(obs_binned) binned_gauss = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1, obs_binned, extended=scale) if simultaneous: obs_binned2 = obs.with_binning(14) test_values_binned2 = test_values.to_binned(obs_binned2) binned_gauss2 = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1, obs_binned2, extended=scale) loss = Loss( [binned_gauss, binned_gauss2], data=[test_values_binned, test_values_binned2], ) else: loss = Loss(model=binned_gauss, data=test_values_binned) title = (f"Binned gaussian fit" f"{' (randomly weighted)' if weights is not None else ''} with " f"{loss.name}") plt.figure() plt.title(title) mplhep.histplot(binned_gauss.to_hist(), label="PDF before fit") mplhep.histplot(test_values_binned.to_hist(), label="Data") # timing, uncomment to test # loss.value_gradient(params=loss.get_params()) # loss.value() # loss.gradient() # import time, progressbar # start = time.time() # for _ in progressbar.progressbar(range(1000)): # loss.value() # loss.gradient() # print(f"Needed: {time.time() - start}") minimizer = zfit.minimize.Minuit(gradient=False) result = minimizer.minimize(loss=loss) params = result.params mplhep.histplot(binned_gauss.to_hist(), label="PDF after fit") plt.legend() pytest.zfit_savefig() result.hesse(name="hesse") result.errors(name="asymerr") print(result) rel_tol_errors = 0.1 mu_error = 0.03 if not simultaneous else 0.021 sigma_error = 0.0156 if simultaneous else 0.022 params_list = [mu1, sigma1] errors = [mu_error, sigma_error] if loss.is_extended: params_list.append(scale) errors.append(122 if simultaneous else 170) for param, errorval in zip(params_list, errors): assert (pytest.approx(result.params[param]["hesse"]["error"], rel=rel_tol_errors) == errorval) assert (pytest.approx(result.params[param]["asymerr"]["lower"], rel=rel_tol_errors) == -errorval) assert (pytest.approx(result.params[param]["asymerr"]["upper"], rel=rel_tol_errors) == errorval) abs_tol_val = 0.15 if weights is None else 0.08 # more fluctuating with weights abs_tol_val *= 2 if isinstance(loss, zfit.loss.BinnedChi2) else 1 assert params[mu1]["value"] == pytest.approx( np.mean(test_values_np_shifted), abs=abs_tol_val) assert params[sigma1]["value"] == pytest.approx( np.std(test_values_np_shifted), abs=abs_tol_val) if loss.is_extended: nexpected = test_values_np_shifted.shape[0] assert params[scale]["value"] == pytest.approx(nexpected, abs=3 * nexpected**0.5) constraints = zfit.constraint.GaussianConstraint( params=[mu2, sigma2], observation=[mu_constr[0], sigma_constr[0]], uncertainty=[mu_constr[1], sigma_constr[1]], ) gaussian2 = zfit.models.tobinned.BinnedFromUnbinnedPDF(gaussian2, obs_binned, extended=scale) loss = Loss(model=gaussian2, data=test_values_binned, constraints=constraints) minimizer = zfit.minimize.Minuit(gradient=False) result = minimizer.minimize(loss=loss, params=[mu2, sigma2]) params = result.params if weights is None: assert params[mu2]["value"] > np.mean(test_values_np_shifted) assert params[sigma2]["value"] < np.std(test_values_np_shifted)