Example #1
0
    def _sample(self, n, limits: ZfitSpace):

        pdf = self.pdfs[0]
        # TODO: use real limits, currently not supported in binned sample
        sample = pdf.sample(n=n)

        edges = sample.space.binning.edges
        ndim = len(edges)
        edges = [znp.array(edge) for edge in edges]
        edges_flat = [znp.reshape(edge, [-1]) for edge in edges]
        lowers = [edge[:-1] for edge in edges_flat]
        uppers = [edge[1:] for edge in edges_flat]
        lowers_meshed = znp.meshgrid(*lowers, indexing="ij")
        uppers_meshed = znp.meshgrid(*uppers, indexing="ij")
        lowers_meshed_flat = [
            znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed
        ]
        uppers_meshed_flat = [
            znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed
        ]
        lower_flat = znp.stack(lowers_meshed_flat, axis=-1)
        upper_flat = znp.stack(uppers_meshed_flat, axis=-1)

        counts_flat = znp.reshape(sample.values(), (-1, ))
        counts_flat = tf.cast(counts_flat,
                              znp.int32)  # TODO: what if we have fractions?
        lower_flat_repeated = tf.repeat(lower_flat, counts_flat, axis=0)
        upper_flat_repeated = tf.repeat(upper_flat, counts_flat, axis=0)
        sample_unbinned = tf.random.uniform(
            (znp.sum(counts_flat), ndim),
            minval=lower_flat_repeated,
            maxval=upper_flat_repeated,
            dtype=self.dtype,
        )
        return sample_unbinned
Example #2
0
    def _loss_func(self, model, data, fit_range, constraints, log_offset):
        nll = super()._loss_func(
            model=model,
            data=data,
            fit_range=fit_range,
            constraints=constraints,
            log_offset=log_offset,
        )
        yields = []
        nevents_collected = []
        for mod, dat in zip(model, data):
            if not mod.is_extended:
                raise NotExtendedPDFError(
                    f"The pdf {mod} is not extended but has to be (for an extended fit)"
                )
            nevents = dat.n_events if dat.weights is None else z.reduce_sum(
                dat.weights)
            nevents = tf.cast(nevents, tf.float64)
            nevents_collected.append(nevents)
            yields.append(mod.get_yield())
        yields = znp.stack(yields, axis=0)
        nevents_collected = znp.stack(nevents_collected, axis=0)

        term_new = tf.nn.log_poisson_loss(nevents_collected, znp.log(yields))
        if log_offset is not None:
            term_new += log_offset
        nll += znp.sum(term_new, axis=0)
        return nll
Example #3
0
 def sumfunc(params):
     values = self.pdfs[0].counts(obs)
     sysshape = list(params.values())
     if sysshape:
         sysshape_flat = tf.stack(sysshape)
         sysshape = znp.reshape(sysshape_flat, values.shape)
         values = values * sysshape
     return znp.sum(values)
Example #4
0
def _nll_calc_unbinned_tf(log_probs, weights=None, log_offset=None):
    if weights is not None:
        log_probs *= weights  # because it's prob ** weights
    if log_offset is not None:
        log_probs -= log_offset
    nll = -znp.sum(log_probs, axis=0)
    # nll = -tfp.math.reduce_kahan_sum(input_tensor=log_probs, axis=0)
    return nll
Example #5
0
    def __init__(
        self,
        data: ztyping.BinnedDataInputType,
        extended: Optional[ztyping.ExtendedInputType] = None,
        norm: Optional[ztyping.NormInputType] = None,
        name: str = "HistogramPDF",
    ) -> None:
        """Binned PDF resembling a histogram.

        Simple histogram PDF that can be used to model a histogram as a PDF.


        Args:
            data: Histogram to be used as PDF.
            extended: |@doc:pdf.init.extended| The overall yield of the PDF.
               If this is parameter-like, it will be used as the yield,
               the expected number of events, and the PDF will be extended.
               An extended PDF has additional functionality, such as the
               `ext_*` methods and the `counts` (for binned PDFs). |@docend:pdf.init.extended|
               |@doc:pdf.init.extended.auto| If `True`,
               the PDF will be extended automatically if the PDF is extended
               using the total number of events in the histogram.
               This is the default. |@docend:pdf.init.extended.auto|
            norm: |@doc:pdf.init.norm| Normalization of the PDF.
               By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
            name: |@doc:model.init.name| Human-readable name
               or label of
               the PDF for better identification.
               Has no programmatical functional purpose as identification. |@docend:model.init.name|
        """
        if extended is None:
            extended = True
        if not isinstance(data, ZfitBinnedData):
            if isinstance(data, PlottableHistogram):
                from zfit._data.binneddatav1 import BinnedData

                data = BinnedData.from_hist(data)
            else:
                raise TypeError(
                    "data must be of type PlottableHistogram (UHI) or ZfitBinnedData"
                )

        params = {}
        if extended is True:
            self._automatically_extended = True
            extended = znp.sum(data.values())
        else:
            self._automatically_extended = False
        super().__init__(obs=data.space,
                         extended=extended,
                         norm=norm,
                         params=params,
                         name=name)
        self._data = data
Example #6
0
def test_z_numpy_ndarray_is_tensorflow_tensor():
    """In tensorflow 2.4.1 tf.experimental.numpy.ndarray was a wrapper around tf.Tensor.

    Now this concept seems to
    have been scratched and tf.experimental.numpy.ndarray is just an alias for tf.Tensor.
    See the commit history of
    https://github.com/tensorflow/tensorflow/commits/master/tensorflow/python/ops/numpy_ops/np_arrays.py
    """
    assert znp.ndarray is tf.Tensor
    assert isinstance(znp.array(1), tf.Tensor)
    assert isinstance(znp.sum(znp.array(0)), tf.Tensor)
Example #7
0
def _unbinned_nll_tf(
    model: ztyping.PDFInputType,
    data: ztyping.DataInputType,
    fit_range: ZfitSpace,
    log_offset=None,
):
    """Return the unbinned negative log likelihood for a PDF.

    Args:
        model: |@doc:loss.init.model| PDFs that return the normalized probability for
               *data* under the given parameters.
               If multiple model and data are given, they will be used
               in the same order to do a simultaneous fit. |@docend:loss.init.model|
        data: |@doc:loss.init.data| Dataset that will be given to the *model*.
               If multiple model and data are given, they will be used
               in the same order to do a simultaneous fit. |@docend:loss.init.data|
        fit_range:

    Returns:
        The unbinned nll
    """

    if is_container(model):
        nlls = [
            _unbinned_nll_tf(model=p,
                             data=d,
                             fit_range=r,
                             log_offset=log_offset)
            for p, d, r in zip(model, data, fit_range)
        ]
        # nlls_total = [nll.total for nll in nlls]
        # nlls_correction = [nll.correction for nll in nlls]
        # nlls_total_summed = znp.sum(input_tensor=nlls_total, axis=0)
        nlls_summed = znp.sum(nlls, axis=0)

        # nlls_correction_summed = znp.sum(input_tensor=nlls_correction, axis=0)
        # nll_finished = (nlls_total_summed, nlls_correction_summed)
        nll_finished = nlls_summed
    else:
        if fit_range is not None:
            with data.set_data_range(fit_range):
                probs = model.pdf(data, norm_range=fit_range)
        else:
            probs = model.pdf(data)
        log_probs = znp.log(probs + znp.asarray(1e-307, dtype=znp.float64)
                            )  # minor offset to avoid NaNs from log(0)
        nll = _nll_calc_unbinned_tf(
            log_probs=log_probs,
            weights=data.weights if data.weights is not None else None,
            log_offset=log_offset,
        )
        nll_finished = nll
    return nll_finished
Example #8
0
def test_sum_histogram_pdf():
    bins1 = 5
    bins2 = 7
    counts = znp.random.uniform(high=1, size=(bins1, bins2))  # generate counts
    counts2 = np.random.normal(loc=5, size=(bins1, bins2))
    counts3 = (znp.linspace(0, 10, num=bins1)[:, None] *
               znp.linspace(0, 5, num=bins2)[None, :])
    binnings = [
        zfit.binned.RegularBinning(bins1, 0, 10, name="obs1"),
        zfit.binned.RegularBinning(7, -10, bins2, name="obs2"),
    ]
    binning = binnings
    obs = zfit.Space(obs=["obs1", "obs2"], binning=binning)

    data = BinnedData.from_tensor(space=obs,
                                  values=counts,
                                  variances=znp.ones_like(counts) * 1.3)
    data2 = BinnedData.from_tensor(obs, counts2)
    data3 = BinnedData.from_tensor(obs, counts3)

    pdf = zfit.pdf.HistogramPDF(data=data, extended=znp.sum(counts))
    pdf2 = zfit.pdf.HistogramPDF(data=data2, extended=znp.sum(counts2))
    pdf3 = zfit.pdf.HistogramPDF(data=data3, extended=znp.sum(counts3))
    assert len(pdf.ext_pdf(data)) > 0
    pdf_sum = zfit.pdf.BinnedSumPDF(pdfs=[pdf, pdf2, pdf3], obs=obs)

    probs = pdf_sum.counts(data)
    true_sum_counts = counts + counts2 + counts3
    np.testing.assert_allclose(true_sum_counts, probs)
    nsamples = 100_000_000
    sample = pdf_sum.sample(n=nsamples)
    np.testing.assert_allclose(true_sum_counts,
                               sample.values() / nsamples *
                               pdf_sum.get_yield(),
                               rtol=0.03)

    # integrate
    true_integral = znp.sum(true_sum_counts)
    integral = pdf_sum.ext_integrate(limits=obs)
    assert pytest.approx(float(true_integral)) == float(integral)
Example #9
0
    def _precompile(self):
        do_subtr = self._options.get("subtr_const", False)
        if do_subtr:
            if do_subtr is not True:
                self._options["subtr_const_value"] = do_subtr
            log_offset = self._options.get("subtr_const_value")
            if log_offset is None:
                from zfit import run

                run.assert_executing_eagerly()  # first time subtr
                nevents_tot = znp.sum([d._approx_nevents for d in self.data])
                log_offset_sum = (
                    self._call_value(
                        data=self.data,
                        model=self.model,
                        fit_range=self.fit_range,
                        constraints=self.constraints,
                        # presumably were not at the minimum,
                        # so the loss will decrease
                        log_offset=z.convert_to_tensor(0.0),
                    ) - 1000.0)
                log_offset = tf.stop_gradient(
                    -znp.divide(log_offset_sum, nevents_tot))
                self._options["subtr_const_value"] = log_offset
Example #10
0
 def _approx_nevents(self):
     return znp.sum(self.values())
Example #11
0
 def nevents(self):
     return znp.sum(self.values())
Example #12
0
 def _rel_counts(self, x, norm=None):
     values = self._counts_with_modifiers(x, norm)
     return values / znp.sum(values)
Example #13
0
 def tot_variances(x):
     nonlocal count
     count += 1
     return znp.sum(x.variances)
Example #14
0
def test_hypotest(benchmark, n_bins, hypotest, eager):
    """Benchmark the performance of pyhf.utils.hypotest() for various numbers of bins and different backends.

    Args:
        benchmark: pytest benchmark
        backend: `pyhf` tensorlib given by pytest parameterization
        n_bins: `list` of number of bins given by pytest parameterization

    Returns:
        None
    """
    source = generate_source_static(n_bins)

    signp = source["bindata"]["sig"]
    bkgnp = source["bindata"]["bkg"]
    uncnp = source["bindata"]["bkgerr"]
    datanp = source["bindata"]["data"]

    if "pyhf" in hypotest:
        hypotest = hypotest_pyhf
        if eager:
            pyhf.set_backend("numpy")
        else:
            pyhf.set_backend("jax")

        pdf = uncorrelated_background(signp, bkgnp, uncnp)
        data = datanp + pdf.config.auxdata
        benchmark(hypotest, pdf, data)
    elif hypotest == "zfit":

        with zfit.run.set_graph_mode(not eager):

            hypotest = hypotest_zfit
            obs = zfit.Space(
                "signal",
                binning=zfit.binned.RegularBinning(n_bins,
                                                   -0.5,
                                                   n_bins + 0.5,
                                                   name="signal"),
            )
            zdata = zfit.data.BinnedData.from_tensor(obs, datanp)
            zmcsig = zfit.data.BinnedData.from_tensor(obs, signp)
            zmcbkg = zfit.data.BinnedData.from_tensor(obs, bkgnp)

            shapesys = {
                f"shapesys_{i}": zfit.Parameter(f"shapesys_{i}", 1, 0.1, 10)
                for i in range(n_bins)
            }
            bkgmodel = BinnedTemplatePDFV1(zmcbkg, sysshape=shapesys)
            # sigyield = zfit.Parameter('sigyield', znp.sum(zmcsig.values()))
            mu = zfit.Parameter("mu", 1, 0.1, 10)
            # sigmodeltmp = BinnedTemplatePDFV1(zmcsig)
            sigyield = zfit.ComposedParameter(
                "sigyield",
                lambda params: params["mu"] * znp.sum(zmcsig.values()),
                params={"mu": mu},
            )
            sigmodel = BinnedTemplatePDFV1(zmcsig, extended=sigyield)
            zmodel = BinnedSumPDF([sigmodel, bkgmodel])
            unc = np.array(uncnp) / np.array(bkgnp)
            constraint = zfit.constraint.GaussianConstraint(
                list(shapesys.values()),
                np.ones_like(unc).tolist(), unc)
            nll = zfit.loss.ExtendedBinnedNLL(zmodel,
                                              zdata,
                                              constraints=constraint)

            minimizer = zfit.minimize.Minuit(tol=1e-3, gradient=False)

            nll.value()
            nll.value()
            nll.gradient()
            nll.gradient()
            benchmark(hypotest, minimizer, nll)
    assert True
Example #15
0
 def _rel_counts(self, x, norm=None):
     values = self._data.values()
     return values / znp.sum(values)
Example #16
0
def create_poly(x, polys, coeffs, recurrence):
    degree = len(coeffs) - 1
    polys = do_recurrence(x, polys=polys, degree=degree, recurrence=recurrence)
    sum_polys = znp.sum([coeff * poly for coeff, poly in zip(coeffs, polys)],
                        axis=0)
    return sum_polys
Example #17
0
def test_simple_examples_1D():
    import zfit.data
    import zfit.z.numpy as znp

    bkgnp = [50.0, 60.0]
    signp = [5.0, 10.0]
    datanp = [60.0, 80.0]
    uncnp = [5.0, 12.0]

    serialized = ("""{
                            "channels": [
                                { "name": "singlechannel",
                                  "samples": [
                                    { "name": "signal",
                                    """ + f"""

              "data": {signp},
              """
                  """
                                                          "modifiers": [ { "name": "mu", "type": "normfactor", "data": null} ]
                                                        },
                                                        { "name": "background",
                                                        """
                  f'"data": {bkgnp},'
                  """
                                                          "modifiers": [ {"name": "uncorr_bkguncrt", "type": "shapesys",
                                                          """
                  f'"data": {uncnp}'
                  """
                                                      } ]
                                                    }
                                                  ]
                                                }
                                            ],
                                            "observations": [
                                                {
                                                """
                  f'"name": "singlechannel", "data": {datanp}'
                  """
                                                        }
                                                    ],
                                                    "measurements": [
                                                        { "name": "Measurement", "config": {"poi": "mu", "parameters": []} }
                                                    ],
                                                    "version": "1.0.0"
                                                    }""")

    obs = zfit.Space("signal",
                     binning=zfit.binned.RegularBinning(2, 0, 2,
                                                        name="signal"))
    zdata = zfit.data.BinnedData.from_tensor(obs, datanp)
    zmcsig = zfit.data.BinnedData.from_tensor(obs, signp)
    zmcbkg = zfit.data.BinnedData.from_tensor(obs, bkgnp)

    shapesys = {
        f"shapesys_{i}": zfit.Parameter(f"shapesys_{i}", 1, 0.1, 10)
        for i in range(2)
    }
    bkgmodel = BinnedTemplatePDFV1(zmcbkg, sysshape=shapesys)
    # sigyield = zfit.Parameter('sigyield', znp.sum(zmcsig.values()))
    mu = zfit.Parameter("mu", 1, 0.1, 10)
    # sigmodeltmp = BinnedTemplatePDFV1(zmcsig)
    sigyield = zfit.ComposedParameter(
        "sigyield",
        lambda params: params["mu"] * znp.sum(zmcsig.values()),
        params={"mu": mu},
    )
    sigmodel = BinnedTemplatePDFV1(zmcsig, extended=sigyield)
    zmodel = BinnedSumPDF([sigmodel, bkgmodel])
    unc = np.array(uncnp) / np.array(bkgnp)
    nll = zfit.loss.ExtendedBinnedNLL(
        zmodel,
        zdata,
        constraints=zfit.constraint.GaussianConstraint(list(shapesys.values()),
                                                       [1, 1], unc),
    )
    # print(nll.value())
    # print(nll.gradient())
    # minimizer = zfit.minimize.ScipyLBFGSBV1()
    # minimizer = zfit.minimize.IpyoptV1()
    minimizer = zfit.minimize.Minuit(tol=1e-5, gradient=False)
    result = minimizer.minimize(nll)
    result.hesse(method="hesse_np")
    # result.errors()
    print(result)
    # mu_z = sigmodel.get_yield() / znp.sum(zmcsig.values())
    zbestfit = zfit.run(result.params)
    errors = [p["hesse"]["error"] for p in result.params.values()]
    # print('minval actual:', nll.value(), nll.gradient())
    # errors = np.ones(3) * 0.1
    # print('mu:', mu_z)

    spec = json.loads(serialized)

    workspace = pyhf.Workspace(spec)
    model = workspace.model(poi_name="mu")

    pars = model.config.suggested_init()
    data = workspace.data(model)

    model.logpdf(pars, data)

    bestfit_pars, twice_nll = pyhf.infer.mle.fit(data,
                                                 model,
                                                 return_fitted_val=True)
    diff = (bestfit_pars - zbestfit) / errors
    # print(bestfit_pars)
    np.testing.assert_allclose(diff, 0, atol=1e-3)