def from_hist(cls, hist: hist.NamedHist) -> BinnedData: """Create a binned dataset from a `hist` histogram. Args: hist: A NamedHist. The axes will be used as the binning in zfit. """ from zfit import Space space = Space(binning=histaxes_to_binning(hist.axes)) values = znp.asarray(hist.values(flow=flow)) variances = hist.variances(flow=flow) if variances is not None: variances = znp.asarray(variances) holder = BinnedHolder(space=space, values=values, variances=variances) return cls(holder=holder)
def hessian(params, hess): params = tf.stack(params) with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape: tape.watch(params) with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape2: tape2.watch(params) y = func(params) gradients = tape2.gradient(y, params) gradients_params = tf.stack([gradients, params]) # if hess != 'diag': # gradients_tf = znp.stack(gradients) if hess == 'diag': # computed_hessian = znp.stack([tape.gradient(grad, sources=param) for param, grad in zip(params, gradients)]) def gradfunc(par_grad): pars = par_grad[1] grads = par_grad[0] tf.print(pars) tf.print(grads) return tape.gradient(grads, sources=pars) # gradfunc = lambda par_grad: par_grad[1] computed_hessian = tf.map_fn(gradfunc, gradients_params) # computed_hessian = tf.map_fn(gradfunc, list(zip(params, gradients))) else: computed_hessian = znp.asarray( tape.jacobian( gradients_tf, sources=params, experimental_use_pfor=True # causes TF bug? Slow.. )) return computed_hessian
def ext_pdf( self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, norm_range=None ) -> ztyping.XType: if norm_range is not None: norm = norm_range if not self.is_extended: raise NotExtendedPDFError # convert the input argument to a standardized form x = self._convert_input_binned_x(x, none_is_space=True) norm = self._check_convert_norm(norm, none_is_error=True) # sort it and remember the original sorting original_space = x if isinstance(x, ZfitSpace) else x.space x = x.with_obs(self.space) # if it is unbinned, we get the binned version and gather the corresponding values is_unbinned = isinstance(x, ZfitUnbinnedData) binindices = None if is_unbinned: binindices = unbinned_to_binindex(x, self.space, flow=True) x = self.space values = self._call_ext_pdf(x, norm=norm) if ( binindices is not None ): # because we have the flow, so we need to make it here with pads padded_values = znp.pad( values, znp.ones((z._get_ndims(values), 2), dtype=znp.float64), mode="constant", ) # for overflow ordered_values = tf.gather_nd(padded_values, indices=binindices) else: ordered_values = move_axis_obs(self.space, original_space, values) return znp.asarray(ordered_values)
def pdf( self, x: ztyping.XTypeInput, norm: ztyping.LimitsTypeInput = None, *, norm_range=None, ) -> ztyping.XType: """Probability density function, normalized over `norm`. Args: norm (): x: `float` or `double` `Tensor`. norm: :py:class:`~zfit.Space` to normalize over Returns: :py:class:`tf.Tensor` of type `self.dtype`. """ assert norm_range is None norm = self._check_input_norm(norm, none_is_error=True) with self._convert_sort_x(x) as x: value = self._single_hook_pdf(x=x, norm=norm) if run.numeric_checks: z.check_numerics( value, message="Check if pdf output contains any NaNs of Infs") return znp.asarray(z.to_real(value))
def from_tensor(cls, space: ZfitSpace, values: znp.array, variances: znp.array | None = None) -> BinnedData: """Create a binned dataset defined in *space* where values are considered to be the counts. Args: space: The space of the data. Variables need to match the values dimensions. The space has to be binned and carry the information about the edges. values: Actual counts of the histogram. variances: Uncertainties of the histogram values. If `True`, the uncertainties are taken to be poissonian distributed. """ values = znp.asarray(values, znp.float64) if variances is True: variances = znp.sqrt(values) elif variances is not None: variances = znp.asarray(variances) return cls(holder=BinnedHolder( space=space, values=values, variances=variances))
def _unbinned_nll_tf( model: ztyping.PDFInputType, data: ztyping.DataInputType, fit_range: ZfitSpace, log_offset=None, ): """Return the unbinned negative log likelihood for a PDF. Args: model: |@doc:loss.init.model| PDFs that return the normalized probability for *data* under the given parameters. If multiple model and data are given, they will be used in the same order to do a simultaneous fit. |@docend:loss.init.model| data: |@doc:loss.init.data| Dataset that will be given to the *model*. If multiple model and data are given, they will be used in the same order to do a simultaneous fit. |@docend:loss.init.data| fit_range: Returns: The unbinned nll """ if is_container(model): nlls = [ _unbinned_nll_tf(model=p, data=d, fit_range=r, log_offset=log_offset) for p, d, r in zip(model, data, fit_range) ] # nlls_total = [nll.total for nll in nlls] # nlls_correction = [nll.correction for nll in nlls] # nlls_total_summed = znp.sum(input_tensor=nlls_total, axis=0) nlls_summed = znp.sum(nlls, axis=0) # nlls_correction_summed = znp.sum(input_tensor=nlls_correction, axis=0) # nll_finished = (nlls_total_summed, nlls_correction_summed) nll_finished = nlls_summed else: if fit_range is not None: with data.set_data_range(fit_range): probs = model.pdf(data, norm_range=fit_range) else: probs = model.pdf(data) log_probs = znp.log(probs + znp.asarray(1e-307, dtype=znp.float64) ) # minor offset to avoid NaNs from log(0) nll = _nll_calc_unbinned_tf( log_probs=log_probs, weights=data.weights if data.weights is not None else None, log_offset=log_offset, ) nll_finished = nll return nll_finished
def pdf( self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, norm_range=None ) -> ztyping.XType: """Probability density function, evaluated at `x` or in the bins of `x` Args: x: values to evaluate the PDF at. If this is a `ZfitBinnedData`-like object, a histogram of *densities* will be returned. If x is a `ZfitUnbinnedData`-like object, the densities will be evaluated at the points of `x`. norm: |@doc:pdf.pdf.norm| Normalization of the function. By default, this is the `norm` of the PDF (which by default is the same as the space of the PDF). |@docend:pdf.pdf.norm| Returns: `Array-like`: probability density """ if norm_range is not None: norm = norm_range # convert the input argument to a standardized form x = self._convert_input_binned_x(x, none_is_space=True) norm = self._check_convert_norm(norm, none_is_error=True) # sort it and remember the original sorting original_space = x if isinstance(x, ZfitSpace) else x.space x = x.with_obs(self.space) # if it is unbinned, we get the binned version and gather the corresponding values is_unbinned = isinstance(x, ZfitUnbinnedData) binindices = None if is_unbinned: binindices = unbinned_to_binindex(x, self.space, flow=True) x = self.space values = self._call_pdf(x, norm=norm) if ( binindices is not None ): # because we have the flow, so we need to make it here with pads padded_values = znp.pad( values, znp.ones((z._get_ndims(values), 2), dtype=znp.float64), mode="constant", ) # for overflow ordered_values = tf.gather_nd(padded_values, indices=binindices) else: ordered_values = move_axis_obs(self.space, original_space, values) return znp.asarray(ordered_values)
def unbinned_to_binindex(data, space, flow=False): if flow: warnings.warn( "Flow currently not fully supported. Values outside the edges are all 0." ) values = [znp.reshape(data.value(ob), (-1, )) for ob in space.obs] edges = [znp.reshape(edge, (-1, )) for edge in space.binning.edges] bins = [ tfp.stats.find_bins(x=val, edges=edge) for val, edge in zip(values, edges) ] stacked_bins = znp.stack(bins, axis=-1) if flow: stacked_bins += 1 bin_is_nan = tf.math.is_nan(stacked_bins) zeros = znp.zeros_like(stacked_bins) binindices = znp.where(bin_is_nan, zeros, stacked_bins) stacked_bins = znp.asarray(binindices, dtype=znp.int32) return stacked_bins
def log_pdf(self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, norm_range=None) -> ztyping.XType: """Log probability density function normalized over `norm_range`. Args: x: `float` or `double` `Tensor`. norm: :py:class:`~zfit.Space` to normalize over Returns: A `Tensor` of type `self.dtype`. """ assert norm_range is None norm = self._check_input_norm(norm) with self._convert_sort_x(x) as x: return znp.asarray( z.to_real(self._single_hook_log_pdf(x=x, norm=norm)))
def _pdf(self, x, norm_range): valcache = self.valcache if valcache is None: valcache = tf.Variable(znp.zeros(shape=tf.shape(x)[0]), trainable=False, validate_shape=False, dtype=tf.float64) self.valcache = valcache params = list(self.pdfs[0].get_params()) values = tf.stack(params) params_same = tf.math.reduce_all( tf.math.abs(values - self.param_values) < self.cache_tolerance) self.valcache_valid.assign(tf.math.logical_and(params_same, self.do_caching), read_value=False) self.param_values.assign(values, read_value=False) value = cache_value(valcache, self.valcache_valid, lambda: self.pdfs[0].pdf(x, norm_range)) return znp.asarray(value)
def value(self, obs: ztyping.ObsTypeInput = None): return znp.asarray(self._value_internal(obs=obs))
def to_real(x, dtype=ztypes.float): return znp.asarray(tf.cast(x, dtype=dtype))
def cut_edges_and_bins( edges: Iterable[znp.array], limits: ZfitSpace, axis=None, unscaled=None ) -> tuple[list[znp.array], tuple[znp.array, znp.array], list | None]: """Cut the *edges* according to *limits* and calculate the bins inside. The edges within limits are calculated and returned together with the corresponding bin indices. The indices mark the lowest and the highest index of the edges that are returned. Additionally, the unscaled edges are returned. If the limits are between two edges, this will be treated as the new edge. If the limits are outside the edges, all edges in this direction will be returned (but not extended to the limit). For example: [0, 0.5, 1., 1.5, 2.] and the limits (0.8, 3.) will return [0.8, 1., 1.5, 2.], ([1], [4]) .. code-block:: cut_edges_and_bins([[0., 0.5, 1., 1.5, 2.]], ([[0.8]], [[3]])) Args: edges: Iterable of tensor-like objects that describe the edges of a histogram. Every object should have rank n (where n is the length of *edges*) but only have the dimension i filled out. These are tensors that are ready to be broadcasted together. limits: The limits that will be used to confine the edges Returns: edges, (lower bins, upper bins), unscaled_edges: The edges and the bins are returned. The upper bin number corresponds to the highest bin which was still (partially) inside the limits **plus one** (so it's the index of the edge that is right outside). The unscaled edges are like *edges* but the last edge is the edge that is lying not inside anymore, so the actual edge of the last bin number returend. This can be used to determine the fraction cut away. """ if axis is not None: axis = convert_to_container(axis) if unscaled is None: unscaled = False if unscaled: cut_unscaled_edges = [] else: cut_unscaled_edges = None cut_scaled_edges = [] all_lower_bins = [] all_upper_bins = [] if isinstance(limits, ZfitSpace): lower, upper = limits.limits else: lower, upper = limits lower = znp.asarray(lower) upper = znp.asarray(upper) lower_all = lower[0] upper_all = upper[0] rank = len(edges) current_axis = 0 for i, edge in enumerate(edges): edge = znp.asarray(edge) edge = znp.reshape(edge, (-1,)) if axis is None or i in axis: lower_i = lower_all[current_axis, None] edge_minimum = edge[0] # edge_minimum = tf.gather(edge, indices=0, axis=i) lower_i = znp.maximum(lower_i, edge_minimum) upper_i = upper_all[current_axis, None] edge_maximum = edge[-1] # edge_maximum = tf.gather(edge, indices=tf.shape(edge)[i] - 1, axis=i) upper_i = znp.minimum(upper_i, edge_maximum) # we get the bins that are just one too far. Then we update this whole bin tensor with the actual edge. # The bins index is the index below the value. lower_bin_float = tfp.stats.find_bins( lower_i, edge, extend_lower_interval=True, extend_upper_interval=True ) lower_bin = tf.reshape(tf.cast(lower_bin_float, dtype=znp.int32), [-1]) # lower_bins = tf.tensor_scatter_nd_update(zero_bins, [[i]], lower_bin) # +1 below because the outer bin is searched, meaning the one that is higher than the value upper_bin_float = tfp.stats.find_bins( upper_i, edge, extend_lower_interval=True, extend_upper_interval=True ) upper_bin = tf.reshape(tf.cast(upper_bin_float, dtype=znp.int32), [-1]) + 1 size = upper_bin - lower_bin new_edge = tf.slice( edge, lower_bin, size + 1 ) # +1 because stop is exclusive new_edge = tf.tensor_scatter_nd_update( new_edge, [tf.constant([0]), size], [lower_i[0], upper_i[0]] ) if unscaled: new_edge_unscaled = tf.slice( edge, lower_bin, size + 1 ) # +1 because stop is exclusive current_axis += 1 else: lower_bin = [0] upper_bin = znp.asarray([edge.shape[0] - 1], dtype=znp.int32) new_edge = edge if unscaled: new_edge_unscaled = edge new_shape = [1] * rank new_shape[i] = -1 new_edge = znp.reshape(new_edge, new_shape) all_lower_bins.append(lower_bin) all_upper_bins.append(upper_bin) cut_scaled_edges.append(new_edge) if unscaled: new_edge_unscaled = znp.reshape(new_edge_unscaled, new_shape) cut_unscaled_edges.append(new_edge_unscaled) # partial = axis is not None and len(axis) < rank # # if partial: # scaled_edges_full = list(edges) # for edge, ax in zip(cut_scaled_edges, axis): # scaled_edges_full[ax] = edge # scaled_edges = scaled_edges_full # indices = tf.convert_to_tensor(axis)[:, None] # lower_bins = tf.scatter_nd(indices, lower_bins, shape=(ndims,)) # upper_bins = tf.tensor_scatter_nd_update(tf.convert_to_tensor(values.shape), # indices, upper_bins) # lower_bins_indices = tf.stack([lower_bins, dims], axis=-1) # upper_bins_indices = tf.stack([upper_bins, dims], axis=-1) # all_lower_bins = tf.cast(znp.sum(all_lower_bins, axis=0), dtype=znp.int32) all_lower_bins = tf.concat(all_lower_bins, axis=0) all_upper_bins = tf.concat(all_upper_bins, axis=0) return cut_scaled_edges, (all_lower_bins, all_upper_bins), cut_unscaled_edges
def binned_rect_integration( *, limits: ZfitSpace, edges: Iterable[znp.array] | znp.array, counts: znp.array | None = None, density: znp.array | None = None, axis: Iterable[int] | int | None = None, ) -> znp.array: """Integrate a histogram over *limits*. This integrator does take into account that limits do not match the edges. Args: limits: Limits to integrate over. A possible binning is ignored. edges: The edges per axis. They should have the shape `(1,..., 1, n, 1, ..., 1)`, where n is the *ith* axis. `ZfitBinning` provides this format on the `edges` attribute. counts: Counts of the histogram. This is what most histograms have and is equal to the density multiplied by the binwidth. Exactly one of counts or density has to be provided. density: The density of a histogram is the bincount divided by the binwidth. Exactly one of counts or density has to be provided. axis: Which axes to integrate over. Defaults to all. Returns: Integral with shape corresponding to the non-integrated axes (or a scalar in case of all axes integrated). """ edges = convert_to_container(edges) if not isinstance(limits, ZfitSpace): raise TypeError(f"limits has to be a ZfitSpace, not {limits}.") if counts is not None: if density is not None: raise ValueError("Either specify 'counts' or 'density' but not both.") is_density = False values = counts elif density is not None: is_density = True values = density else: raise ValueError("Need to specify either 'counts' or 'density', not None.") ndims = z._get_ndims(values) # partial = axis is not None and len(axis) < ndims if axis is not None: axis = convert_to_container(axis) if len(axis) > ndims: raise ValueError( f"axis {axis} is larger than values has ndims {values.shape}." ) else: axis = list(range(ndims)) scaled_edges, (lower_bins, upper_bins), unscaled_edges = cut_edges_and_bins( edges=edges, limits=limits, axis=axis, unscaled=True ) values_cut = tf.slice( values, lower_bins, (upper_bins - lower_bins) ) # since limits are inclusive rank = values.shape.rank binwidths = [] if not is_density: binwidths_unscaled = [] # calculate the binwidth in each dimension for i, edge in enumerate(scaled_edges): edge_lower_index = [0] * rank # int32 is needed! Otherwise the gradient will fail edge_lowest_index = znp.array(edge_lower_index, dtype=znp.int32) edge_lower_index[i] = 1 edge_lower_index = znp.array(edge_lower_index, dtype=znp.int32) edge_upper_index = [1] * rank edge_highest_index = edge_upper_index.copy() len_edge = tf.shape(edge)[i] edge_highest_index[i] = len_edge edge_highest_index = znp.asarray(edge_highest_index, dtype=znp.int32) edge_upper_index[i] = len_edge - 1 # len n -> index max is n - 1 edge_upper_index = znp.asarray(edge_upper_index, dtype=znp.int32) lower_edge = tf.slice( edge, edge_lowest_index, (edge_upper_index - edge_lowest_index) ) upper_edge = tf.slice( edge, edge_lower_index, (edge_highest_index - edge_lower_index) ) binwidths.append(upper_edge - lower_edge) if not is_density: # unscaled edges to get the ratio lower_edge_unscaled = tf.slice( unscaled_edges[i], edge_lowest_index, (edge_upper_index - edge_lowest_index), ) upper_edge_unscaled = tf.slice( unscaled_edges[i], edge_lower_index, (edge_highest_index - edge_lower_index), ) binwidths_unscaled.append(upper_edge_unscaled - lower_edge_unscaled) binareas = reduce( operator.mul, binwidths ) # needs to be np as znp or tf can't broadcast otherwise if not is_density: # scale the counts by the fraction. This is mostly one. binareas_uncut = np.prod(binwidths_unscaled, axis=0) binareas /= binareas_uncut values_cut *= binareas integral = tf.reduce_sum(values_cut, axis=axis) return integral