def test_z_numpy_ndarray_is_tensorflow_tensor(): """In tensorflow 2.4.1 tf.experimental.numpy.ndarray was a wrapper around tf.Tensor. Now this concept seems to have been scratched and tf.experimental.numpy.ndarray is just an alias for tf.Tensor. See the commit history of https://github.com/tensorflow/tensorflow/commits/master/tensorflow/python/ops/numpy_ops/np_arrays.py """ assert znp.ndarray is tf.Tensor assert isinstance(znp.array(1), tf.Tensor) assert isinstance(znp.sum(znp.array(0)), tf.Tensor)
def unbinned_to_hist_eager(values, edges, weights=None): if weights is not None and weights.shape == () and None in weights: weights = None binning = [ hist.axis.Variable(np.reshape(edge, (-1, )), flow=False) for edge in edges ] h = hist.Hist(*binning, storage=hist.storage.Weight()) h.fill(*(values[:, i] for i in range(values.shape[1])), weight=weights) return znp.array(h.values(flow=False), znp.float64), znp.array(h.variances(flow=False), znp.float64)
def _rel_counts(self, x, norm): pdf = self.pdfs[0] edges = [znp.array(edge) for edge in self.axes.edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") shape = tf.shape(lowers_meshed[0]) lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) options = {"type": "bins"} @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.integrate(limits_space, norm=False, options=options) limits = znp.stack([lower_flat, upper_flat], axis=1) values = tf.vectorized_map(integrate_one, limits) values = znp.reshape(values, shape) if norm: values /= pdf.normalization(norm) return values
def test_binned_chi2_loss(Loss, empty, errors): # TODO: add test with zeros in bins obs = zfit.Space("obs1", limits=(-1, 2)) gaussian1, mu1, sigma1 = create_gauss1(obs=obs) test_values_np_shifted = test_values_np - 1.8 # shift them a bit test_values_np_shifted *= 1.2 test_values = znp.array(test_values_np_shifted) test_values = zfit.Data.from_tensor(obs=obs, tensor=test_values) init_yield = test_values_np.shape[0] * 1.2 scale = zfit.Parameter("yield", init_yield, 0, init_yield * 4, step_size=1) binning = zfit.binned.RegularBinning(32, obs.lower[0], obs.upper[0], name="obs1") obs_binned = obs.with_binning(binning) test_values_binned = test_values.to_binned(obs_binned) binned_gauss = zfit.models.tobinned.BinnedFromUnbinnedPDF(gaussian1, obs_binned, extended=scale) loss = Loss( model=binned_gauss, data=test_values_binned, options={ "empty": empty, "errors": errors }, ) loss.value_gradient(loss.get_params())
def _check_init_values(self, space, values, variances): value_shape = tf.shape(values) edges_shape = znp.array([ tf.shape(znp.reshape(edge, (-1, )))[0] for edge in space.binning.edges ]) values_rank = value_shape.shape[0] if variances is not None: variances_shape = tf.shape(variances) variances_rank = variances_shape.shape[0] if values_rank != variances_rank: raise ShapeIncompatibleError( f"Values {values} and variances {variances} differ in rank: {values_rank} vs {variances_rank}" ) tf.assert_equal( variances_shape, value_shape, message=f"Variances and values do not have the same shape:" f" {variances_shape} vs {value_shape}", ) binning_rank = len(space.binning.edges) if binning_rank != values_rank: raise ShapeIncompatibleError( f"Values and binning differ in rank: {values_rank} vs {binning_rank}" ) tf.assert_equal( edges_shape - 1, value_shape, message=f"Edges (minus one) and values do not have the same shape:" f" {edges_shape} vs {value_shape}", )
def _sample(self, n, limits: ZfitSpace): pdf = self.pdfs[0] # TODO: use real limits, currently not supported in binned sample sample = pdf.sample(n=n) edges = sample.space.binning.edges ndim = len(edges) edges = [znp.array(edge) for edge in edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) counts_flat = znp.reshape(sample.values(), (-1, )) counts_flat = tf.cast(counts_flat, znp.int32) # TODO: what if we have fractions? lower_flat_repeated = tf.repeat(lower_flat, counts_flat, axis=0) upper_flat_repeated = tf.repeat(upper_flat, counts_flat, axis=0) sample_unbinned = tf.random.uniform( (znp.sum(counts_flat), ndim), minval=lower_flat_repeated, maxval=upper_flat_repeated, dtype=self.dtype, ) return sample_unbinned
def test_unbinned_data(): n = 751 gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n, 70) x = znp.linspace(-5, 10, 200) centers = obs_binned.binning.centers[0] y_binned = gauss_binned.pdf(x) y_true = gauss.pdf(x) max_error = np.max(y_true) / 10 np.testing.assert_allclose(y_true, y_binned, atol=max_error) ycenter_binned = gauss_binned.pdf(centers) ycenter_true = gauss.pdf(centers) np.testing.assert_allclose(ycenter_binned, ycenter_true, atol=max_error / 10) x_outside = znp.array([-7.0, 3.0, 12]) y_outside = gauss_binned.pdf(x_outside) assert y_outside[0] == 0 assert y_outside[1] > 0 assert y_outside[2] == 0 plt.figure() plt.title("Binned Gauss evaluated on unbinned edges") plt.plot(centers, ycenter_true, label="unbinned pdf") plt.plot(centers, ycenter_binned, "--", label="binned pdf") plt.legend() pytest.zfit_savefig() # plt.show() plt.figure() plt.title("Binned Gauss evaluated on unbinned data") plt.plot(x, y_true, label="unbinned pdf") plt.plot(x, y_binned, "--", label="binned pdf") plt.legend() pytest.zfit_savefig()
def test_binned_loss_hist(weights, Loss): obs = zfit.Space("obs1", limits=(-15, 25)) gaussian1, mu1, sigma1 = create_gauss1(obs=obs) test_values_np_shifted = test_values_np - 1.8 # shift them a bit test_values_np_shifted *= 1.2 test_values = znp.array(test_values_np_shifted) test_values = zfit.Data.from_tensor(obs=obs, tensor=test_values, weights=weights) init_yield = test_values_np.shape[0] * 1.2 scale = zfit.Parameter("yield", init_yield, 0, init_yield * 4, step_size=1) binning = zfit.binned.RegularBinning(32, obs.lower[0], obs.upper[0], name="obs1") obs_binned = obs.with_binning(binning) test_values_binned = test_values.to_binned(obs_binned) h = test_values_binned.to_hist() binned_gauss = zfit.models.tobinned.BinnedFromUnbinnedPDF(gaussian1, obs_binned, extended=scale) loss = Loss(model=binned_gauss, data=h) loss2 = Loss(model=binned_gauss, data=test_values_binned) assert pytest.approx(float(loss.value()), float(loss2.value()))
def test_variance(): import zfit import zfit.z.numpy as znp binning1 = zfit.binned.RegularBinning(3, -3.5, 3, name="x") obs = zfit.Space("x", binning=binning1) values = znp.array([100.0, 200, 50]) data = zfit.data.BinnedData.from_tensor(obs, values=values, variances=True) data2 = zfit.data.BinnedData.from_tensor(obs, values=values, variances=values**0.5) np.testing.assert_allclose(data.variances(), data2.variances())
def _counts(self, x, norm): pdf = self.pdfs[0] edges = [znp.array(edge) for edge in self.axes.edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") shape = tf.shape(lowers_meshed[0]) lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) options = {"type": "bins"} if pdf.is_extended: @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.ext_integrate(limits_space, norm=False, options=options) missing_yield = False else: @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.integrate(limits_space, norm=False, options=options) missing_yield = True limits = znp.stack([lower_flat, upper_flat], axis=1) try: values = tf.vectorized_map(integrate_one, limits)[:, 0] except ValueError: values = tf.map_fn(integrate_one, limits) values = znp.reshape(values, shape) if missing_yield: values *= self.get_yield() if norm: values /= pdf.normalization(norm) return values
def unbinned_to_binned(data, space, binned_class=None): if binned_class is None: from zfit._data.binneddatav1 import BinnedData binned_class = BinnedData values = data.value() weights = data.weights if weights is not None: weights = znp.array(weights) edges = tuple(space.binning.edges) values, variances = tf.numpy_function( unbinned_to_hist_eager, inp=[values, edges, weights], Tout=[tf.float64, tf.float64], ) binned = binned_class.from_tensor(space=space, values=values, variances=variances) return binned
def test_unbinned_data2D(): n = 751 gauss, gauss_binned, obs, obs_binned = create_gauss2d_binned(n, 50) data = znp.random.uniform([-5, 50], [10, 600], size=(1000, 2)) y_binned = gauss_binned.pdf(data) y_true = gauss.pdf(data) max_error = np.max(y_true) / 10 np.testing.assert_allclose(y_true, y_binned, atol=max_error) centers = obs_binned.binning.centers X, Y = znp.meshgrid(*centers, indexing="ij") centers = znp.stack([znp.reshape(t, (-1,)) for t in (X, Y)], axis=-1) ycenter_binned = gauss_binned.pdf(centers) ycenter_true = gauss.pdf(centers) np.testing.assert_allclose(ycenter_binned, ycenter_true, atol=max_error / 10) # for the extended case y_binned_ext = gauss_binned.ext_pdf(data) y_true_ext = gauss.ext_pdf(data) max_error_ext = np.max(y_true_ext) / 10 np.testing.assert_allclose(y_true_ext, y_binned_ext, atol=max_error_ext) ycenter_binned_ext = gauss_binned.ext_pdf(centers) ycenter_true_ext = gauss.ext_pdf(centers) np.testing.assert_allclose( ycenter_binned_ext, ycenter_true_ext, atol=max_error_ext / 10 ) x_outside = znp.array([[-7.0, 55], [3.0, 13], [2, 150], [12, 30], [14, 1000]]) y_outside = gauss_binned.pdf(x_outside) assert y_outside[0] == 0 assert y_outside[1] == 0 assert y_outside[2] > 0 assert y_outside[3] == 0 assert y_outside[4] == 0 y_outside_ext = gauss_binned.ext_pdf(x_outside) assert y_outside_ext[0] == 0 assert y_outside_ext[1] == 0 assert y_outside_ext[2] > 0 assert y_outside_ext[3] == 0 assert y_outside_ext[4] == 0
def test_binned_loss(weights, Loss, simultaneous): obs = zfit.Space("obs1", limits=(-15, 25)) gaussian1, mu1, sigma1 = create_gauss1(obs=obs) gaussian2, mu2, sigma2 = create_gauss2(obs=obs) test_values_np_shifted = test_values_np - 1.8 # shift them a bit test_values_np_shifted *= 1.2 test_values = znp.array(test_values_np_shifted) test_values = zfit.Data.from_tensor(obs=obs, tensor=test_values, weights=weights) init_yield = test_values_np.shape[0] * 1.2 scale = zfit.Parameter("yield", init_yield, 0, init_yield * 4, step_size=1) binning = zfit.binned.RegularBinning(32, obs.lower[0], obs.upper[0], name="obs1") obs_binned = obs.with_binning(binning) test_values_binned = test_values.to_binned(obs_binned) binned_gauss = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1, obs_binned, extended=scale) if simultaneous: obs_binned2 = obs.with_binning(14) test_values_binned2 = test_values.to_binned(obs_binned2) binned_gauss2 = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1, obs_binned2, extended=scale) loss = Loss( [binned_gauss, binned_gauss2], data=[test_values_binned, test_values_binned2], ) else: loss = Loss(model=binned_gauss, data=test_values_binned) title = (f"Binned gaussian fit" f"{' (randomly weighted)' if weights is not None else ''} with " f"{loss.name}") plt.figure() plt.title(title) mplhep.histplot(binned_gauss.to_hist(), label="PDF before fit") mplhep.histplot(test_values_binned.to_hist(), label="Data") # timing, uncomment to test # loss.value_gradient(params=loss.get_params()) # loss.value() # loss.gradient() # import time, progressbar # start = time.time() # for _ in progressbar.progressbar(range(1000)): # loss.value() # loss.gradient() # print(f"Needed: {time.time() - start}") minimizer = zfit.minimize.Minuit(gradient=False) result = minimizer.minimize(loss=loss) params = result.params mplhep.histplot(binned_gauss.to_hist(), label="PDF after fit") plt.legend() pytest.zfit_savefig() result.hesse(name="hesse") result.errors(name="asymerr") print(result) rel_tol_errors = 0.1 mu_error = 0.03 if not simultaneous else 0.021 sigma_error = 0.0156 if simultaneous else 0.022 params_list = [mu1, sigma1] errors = [mu_error, sigma_error] if loss.is_extended: params_list.append(scale) errors.append(122 if simultaneous else 170) for param, errorval in zip(params_list, errors): assert (pytest.approx(result.params[param]["hesse"]["error"], rel=rel_tol_errors) == errorval) assert (pytest.approx(result.params[param]["asymerr"]["lower"], rel=rel_tol_errors) == -errorval) assert (pytest.approx(result.params[param]["asymerr"]["upper"], rel=rel_tol_errors) == errorval) abs_tol_val = 0.15 if weights is None else 0.08 # more fluctuating with weights abs_tol_val *= 2 if isinstance(loss, zfit.loss.BinnedChi2) else 1 assert params[mu1]["value"] == pytest.approx( np.mean(test_values_np_shifted), abs=abs_tol_val) assert params[sigma1]["value"] == pytest.approx( np.std(test_values_np_shifted), abs=abs_tol_val) if loss.is_extended: nexpected = test_values_np_shifted.shape[0] assert params[scale]["value"] == pytest.approx(nexpected, abs=3 * nexpected**0.5) constraints = zfit.constraint.GaussianConstraint( params=[mu2, sigma2], observation=[mu_constr[0], sigma_constr[0]], uncertainty=[mu_constr[1], sigma_constr[1]], ) gaussian2 = zfit.models.tobinned.BinnedFromUnbinnedPDF(gaussian2, obs_binned, extended=scale) loss = Loss(model=gaussian2, data=test_values_binned, constraints=constraints) minimizer = zfit.minimize.Minuit(gradient=False) result = minimizer.minimize(loss=loss, params=[mu2, sigma2]) params = result.params if weights is None: assert params[mu2]["value"] > np.mean(test_values_np_shifted) assert params[sigma2]["value"] < np.std(test_values_np_shifted)
def binned_rect_integration( *, limits: ZfitSpace, edges: Iterable[znp.array] | znp.array, counts: znp.array | None = None, density: znp.array | None = None, axis: Iterable[int] | int | None = None, ) -> znp.array: """Integrate a histogram over *limits*. This integrator does take into account that limits do not match the edges. Args: limits: Limits to integrate over. A possible binning is ignored. edges: The edges per axis. They should have the shape `(1,..., 1, n, 1, ..., 1)`, where n is the *ith* axis. `ZfitBinning` provides this format on the `edges` attribute. counts: Counts of the histogram. This is what most histograms have and is equal to the density multiplied by the binwidth. Exactly one of counts or density has to be provided. density: The density of a histogram is the bincount divided by the binwidth. Exactly one of counts or density has to be provided. axis: Which axes to integrate over. Defaults to all. Returns: Integral with shape corresponding to the non-integrated axes (or a scalar in case of all axes integrated). """ edges = convert_to_container(edges) if not isinstance(limits, ZfitSpace): raise TypeError(f"limits has to be a ZfitSpace, not {limits}.") if counts is not None: if density is not None: raise ValueError("Either specify 'counts' or 'density' but not both.") is_density = False values = counts elif density is not None: is_density = True values = density else: raise ValueError("Need to specify either 'counts' or 'density', not None.") ndims = z._get_ndims(values) # partial = axis is not None and len(axis) < ndims if axis is not None: axis = convert_to_container(axis) if len(axis) > ndims: raise ValueError( f"axis {axis} is larger than values has ndims {values.shape}." ) else: axis = list(range(ndims)) scaled_edges, (lower_bins, upper_bins), unscaled_edges = cut_edges_and_bins( edges=edges, limits=limits, axis=axis, unscaled=True ) values_cut = tf.slice( values, lower_bins, (upper_bins - lower_bins) ) # since limits are inclusive rank = values.shape.rank binwidths = [] if not is_density: binwidths_unscaled = [] # calculate the binwidth in each dimension for i, edge in enumerate(scaled_edges): edge_lower_index = [0] * rank # int32 is needed! Otherwise the gradient will fail edge_lowest_index = znp.array(edge_lower_index, dtype=znp.int32) edge_lower_index[i] = 1 edge_lower_index = znp.array(edge_lower_index, dtype=znp.int32) edge_upper_index = [1] * rank edge_highest_index = edge_upper_index.copy() len_edge = tf.shape(edge)[i] edge_highest_index[i] = len_edge edge_highest_index = znp.asarray(edge_highest_index, dtype=znp.int32) edge_upper_index[i] = len_edge - 1 # len n -> index max is n - 1 edge_upper_index = znp.asarray(edge_upper_index, dtype=znp.int32) lower_edge = tf.slice( edge, edge_lowest_index, (edge_upper_index - edge_lowest_index) ) upper_edge = tf.slice( edge, edge_lower_index, (edge_highest_index - edge_lower_index) ) binwidths.append(upper_edge - lower_edge) if not is_density: # unscaled edges to get the ratio lower_edge_unscaled = tf.slice( unscaled_edges[i], edge_lowest_index, (edge_upper_index - edge_lowest_index), ) upper_edge_unscaled = tf.slice( unscaled_edges[i], edge_lower_index, (edge_highest_index - edge_lower_index), ) binwidths_unscaled.append(upper_edge_unscaled - lower_edge_unscaled) binareas = reduce( operator.mul, binwidths ) # needs to be np as znp or tf can't broadcast otherwise if not is_density: # scale the counts by the fraction. This is mostly one. binareas_uncut = np.prod(binwidths_unscaled, axis=0) binareas /= binareas_uncut values_cut *= binareas integral = tf.reduce_sum(values_cut, axis=axis) return integral
def _pdf(self, x, norm): densities = [hist.pdf(x, norm=norm) for hist in self.hists.values()] alphas = znp.array(list(self.hists.keys()), dtype=znp.float64) alpha = self.params["alpha"] y = self._morphing_interpolator(alpha, alphas, densities) return y
def test_projection_pdf(test_values): import numpy as np import zfit import zfit.z.numpy as znp x = zfit.Space("x", limits=(-1, 1)) y = zfit.Space("y", limits=(-1, 1)) def correlated_func(self, x): x, y = x.unstack_x() value = ((x - y**3) ** 2) + 0.1 return value def correlated_func_integrate_x(y, limits): lower, upper = limits.rect_limits def integ(x, y): return ( 0.333333333333333 * x**3 - 1.0 * x**2 * y**3 + x * (1.0 * y**6 + 0.1) ) return integ(y, upper) - integ(y, lower) def correlated_func_integrate_y(x, limits): lower, upper = limits.rect_limits def integ(x, y): return ( -0.5 * x * y**4 + 0.142857142857143 * y**7 + y * (1.0 * x**2 + 0.1) ) return (integ(x, upper) - integ(x, lower))[0] obs = x * y from zfit.models.special import SimplePDF gauss_xy = SimplePDF(func=correlated_func, obs=obs) assert gauss_xy.create_projection_pdf(limits=y).norm_range == x proj_pdf = gauss_xy.create_projection_pdf(limits=y) test_values = znp.array( [ -0.95603563, -0.84636306, -0.83895759, 2.62608006, 1.02336499, -0.99631608, -1.22185623, 0.83838586, 2.77894762, -2.48259488, 1.5440374, 0.1109899, 0.20873491, -2.45271623, 2.04510553, 0.31566277, -1.55696965, 0.36304538, 0.77765786, 3.92630088, ] ) true_probs = correlated_func_integrate_y(test_values, y) / gauss_xy.integrate( limits=obs, norm=False, ) probs = proj_pdf.pdf(x=test_values) probs = probs.numpy() np.testing.assert_allclose(probs, true_probs, rtol=1e-3) # MC normalization
def __init__( self, alpha: ztyping.ParamTypeInput, hists: ( Mapping[float | int, Iterable[ZfitBinnedPDF]] | list[ZfitBinnedPDF] | tuple[ZfitBinnedPDF] ), extended: ztyping.ExtendedInputType = None, norm: ztyping.NormInputType = None, ): """Morphing a set of histograms with a spline interpolation. Args: alpha: Parameter for the spline interpolation. hists: A mapping of alpha values to histograms. This allows for arbitrary interpolation points. If a list or tuple of exactly three PDFs is given, this corresponds to the histograms at alhpa equal to -1, 0 and 1 respectively. extended: |@doc:pdf.init.extended| The overall yield of the PDF. If this is parameter-like, it will be used as the yield, the expected number of events, and the PDF will be extended. An extended PDF has additional functionality, such as the `ext_*` methods and the `counts` (for binned PDFs). |@docend:pdf.init.extended| norm: |@doc:pdf.init.norm| Normalization of the PDF. By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm| """ if isinstance(hists, (list, tuple)): if len(hists) != 3: raise ValueError( "If hists is a list, it is assumed to correspond to an alpha of -1, 0 and 1." f" hists is {hists} and has length {len(hists)}." ) else: hists = { float(i - 1): hist for i, hist in enumerate(hists) } # mapping to -1, 0, 1 self.hists = hists self.alpha = alpha obs = list(hists.values())[0].space if extended is None: # TODO: yields? extended = all(hist.is_extended for hist in hists.values()) if extended: alphas = znp.array(list(self.hists.keys()), dtype=znp.float64) def interpolated_yield(params): alpha = params["alpha"] densities = tuple( params[f"{i}"] for i in range(len(params) - 1) ) # minus alpha, we don't want it return spline_interpolator( alpha=alpha, alphas=alphas, densities=densities ) number = parameter.get_auto_number() yields = { f"{i}": hist.get_yield() for i, hist in enumerate(hists.values()) } yields["alpha"] = alpha new_yield = parameter.ComposedParameter( f"AUTOGEN_{number}_interpolated_yield", interpolated_yield, params=yields, ) extended = new_yield super().__init__( obs=obs, extended=extended, norm=norm, params={"alpha": alpha}, name="LinearMorphing", )