Ejemplo n.º 1
0
def test_binned_extended_simple(Loss):
    # zfit.run.set_graph_mode(False)
    counts = np.random.uniform(high=1, size=(10, 20))  # generate counts
    counts2 = np.random.normal(loc=5, size=(10, 20))
    counts3 = np.linspace(0, 10, num=10)[:, None] * np.linspace(
        0, 5, num=20)[None, :]
    binning = [
        zfit.binned.VariableBinning(
            [-10, -5, 5, 7, 10, 15, 17, 21, 23.5, 27, 30], name="obs1"),
        RegularBinning(20, 0, 10, name="obs2"),
    ]
    obs = zfit.Space(obs=["obs1", "obs2"], binning=binning)

    mc1 = BinnedData.from_tensor(space=obs,
                                 values=counts,
                                 variances=znp.ones_like(counts) * 1.3)
    mc2 = BinnedData.from_tensor(obs, counts2)
    mc3 = BinnedData.from_tensor(obs, counts3)
    sum_counts = counts + counts2 + counts3
    observed_data = BinnedData.from_tensor(space=obs,
                                           values=sum_counts,
                                           variances=(sum_counts + 0.5)**2)

    pdf = BinnedTemplatePDFV1(data=mc1)
    pdf2 = BinnedTemplatePDFV1(data=mc2)
    pdf3 = BinnedTemplatePDFV1(data=mc3)
    pdf_sum = BinnedSumPDF(pdfs=[pdf, pdf2, pdf3], obs=obs)

    nll = Loss(pdf_sum, data=observed_data)
    nll.value(), nll.gradient()  # TODO: add some check?
Ejemplo n.º 2
0
def test_from_and_to_hist():
    h3 = hist.NamedHist(
        hist.axis.Regular(25, -3.5, 3, name="x", flow=False),
        hist.axis.Regular(21, -4, 5, name="y", flow=False),
        storage=hist.storage.Weight(),
    )

    x2 = np.random.randn(1_000)
    y2 = 0.5 * np.random.randn(1_000)

    h3.fill(x=x2, y=y2)

    from zfit._data.binneddatav1 import BinnedData

    for _ in range(10):  # make sure this works many times
        h1 = BinnedData.from_hist(h3)
        np.testing.assert_allclose(h1.variances(), h3.variances())
        np.testing.assert_allclose(h1.values(), h3.values())
        unbinned = h1.to_unbinned()
        assert unbinned.value().shape[1] == 2
        assert unbinned.value().shape[0] == unbinned.weights.shape[0]

        h3recreated = h1.to_hist()
        assert h3recreated == h3

    bh3 = bh.Histogram(h1)
    np.testing.assert_allclose(h1.variances(), bh3.variances())
    np.testing.assert_allclose(h1.values(), bh3.values())
Ejemplo n.º 3
0
    def sample(
        self, n: int = None, limits: ztyping.LimitsType = None
    ) -> ZfitBinnedData:
        """Draw a random binned sample from the PDF.

        Args:
            n: |@doc:pdf.sample.n| Number of samples to draw.
               For an extended PDF, the argument is optional and will be the
               poisson-fluctuated expected number of events, i.e. the yield. |@docend:pdf.sample.n|
            limits: |@doc:pdf.sample.limits| Limits of the sampling.
               By default, this is the same as the default space of the PDF. |@docend:pdf.sample.limits|

        Returns:
            ``ZfitBinnedData``: Sampled dataset
        """
        if n is None:
            if self.is_extended:
                n = znp.random.poisson(self.get_yield(), size=1)
            else:
                raise ValueError(
                    f"n cannot be None for sampling of {self} or needs to be extended."
                )
        original_limits = limits
        limits = self._check_convert_limits(limits)
        values = self._call_sample(n, limits)
        if not isinstance(values, ZfitBinnedData):
            values = BinnedData.from_tensor(space=limits, values=values, variances=None)
        if isinstance(original_limits, ZfitSpace):
            values = values.with_obs(original_limits)
        return values
Ejemplo n.º 4
0
    def __init__(
        self,
        data: ztyping.BinnedDataInputType,
        extended: Optional[ztyping.ExtendedInputType] = None,
        norm: Optional[ztyping.NormInputType] = None,
        name: str = "HistogramPDF",
    ) -> None:
        """Binned PDF resembling a histogram.

        Simple histogram PDF that can be used to model a histogram as a PDF.


        Args:
            data: Histogram to be used as PDF.
            extended: |@doc:pdf.init.extended| The overall yield of the PDF.
               If this is parameter-like, it will be used as the yield,
               the expected number of events, and the PDF will be extended.
               An extended PDF has additional functionality, such as the
               `ext_*` methods and the `counts` (for binned PDFs). |@docend:pdf.init.extended|
               |@doc:pdf.init.extended.auto| If `True`,
               the PDF will be extended automatically if the PDF is extended
               using the total number of events in the histogram.
               This is the default. |@docend:pdf.init.extended.auto|
            norm: |@doc:pdf.init.norm| Normalization of the PDF.
               By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
            name: |@doc:model.init.name| Human-readable name
               or label of
               the PDF for better identification.
               Has no programmatical functional purpose as identification. |@docend:model.init.name|
        """
        if extended is None:
            extended = True
        if not isinstance(data, ZfitBinnedData):
            if isinstance(data, PlottableHistogram):
                from zfit._data.binneddatav1 import BinnedData

                data = BinnedData.from_hist(data)
            else:
                raise TypeError(
                    "data must be of type PlottableHistogram (UHI) or ZfitBinnedData"
                )

        params = {}
        if extended is True:
            self._automatically_extended = True
            extended = znp.sum(data.values())
        else:
            self._automatically_extended = False
        super().__init__(obs=data.space,
                         extended=extended,
                         norm=norm,
                         params=params,
                         name=name)
        self._data = data
Ejemplo n.º 5
0
def test_binned_template_pdf():
    bins1 = 5
    bins2 = 7
    counts = np.random.uniform(high=1, size=(bins1, bins2))  # generate counts
    counts2 = np.random.normal(loc=5, size=(bins1, bins2))
    counts3 = (np.linspace(0, 10, num=bins1)[:, None] *
               np.linspace(0, 5, num=bins2)[None, :])
    binnings = [
        zfit.binned.RegularBinning(bins1, 0, 10, name="obs1"),
        zfit.binned.RegularBinning(7, -10, bins2, name="obs2"),
    ]
    binning = binnings
    axes = zfit.binned.Binnings(binning)
    obs = zfit.Space(obs=["obs1", "obs2"], binning=binning)

    data = BinnedData.from_tensor(space=obs,
                                  values=counts,
                                  variances=znp.ones_like(counts) * 1.3)
    data2 = BinnedData.from_tensor(obs, counts2)
    data3 = BinnedData.from_tensor(obs, counts3)

    pdf = BinnedTemplatePDFV1(data=data, extended=np.sum(counts))
    pdf2 = BinnedTemplatePDFV1(data=data2, extended=np.sum(counts2))
    pdf3 = BinnedTemplatePDFV1(data=data3, extended=np.sum(counts3))
    assert len(pdf.ext_pdf(data)) > 0
    pdf_sum = BinnedSumPDF(pdfs=[pdf, pdf2, pdf3], obs=obs)

    probs = pdf_sum.counts(data)
    true_sum_counts = counts + counts2 + counts3
    np.testing.assert_allclose(true_sum_counts, probs)
    nsamples = 100_000_000
    sample = pdf_sum.sample(n=nsamples)
    np.testing.assert_allclose(true_sum_counts,
                               sample.values() / nsamples *
                               pdf_sum.get_yield(),
                               rtol=0.03)

    # integrate
    true_integral = znp.sum(true_sum_counts)
    integral = pdf_sum.ext_integrate(limits=obs)
    assert pytest.approx(float(true_integral)) == float(integral)
Ejemplo n.º 6
0
    def to_binneddata(self, **kwargs) -> zfit.data.BinnedData:
        """Create an Asimov dataset as `BinnedData` using either `counts` (for extended) or `rel_counts`

        Args:
            **kwargs (): arguments to `counts` or `rel_counts`.

        Returns:
            BinnedData: Binned data representing the Asimov dataset of this PDF.
        """
        values = self.values(**kwargs)
        data = BinnedData.from_tensor(space=self.space, values=values)
        return data
Ejemplo n.º 7
0
def test_morphing_templates2D(alphas):
    bins1 = 10
    bins2 = 7
    shape = (bins1, bins2)
    counts1 = np.random.uniform(70, high=100, size=shape)  # generate counts
    # counts1 = np.random.uniform(70, high=100, size=bins1)  # generate counts
    counts = [
        counts1 - np.random.uniform(high=20, size=shape),
        counts1,
        counts1 + np.random.uniform(high=20, size=shape),
    ]
    if alphas is not None:
        counts.append(counts1 + np.random.uniform(high=5, size=shape))
    binning1 = zfit.binned.VariableBinning(sorted(
        np.random.uniform(0, 10, size=bins1 + 1)),
                                           name="obs1")
    binning2 = zfit.binned.RegularBinning(bins2, 0, 10, name="obs2")
    obs1 = zfit.Space(obs="obs1", binning=binning1)
    obs2 = zfit.Space(obs="obs2", binning=binning2)
    obs = obs1 * obs2
    datasets = [BinnedData.from_tensor(obs, count) for count in counts]
    pdfs = [
        BinnedTemplatePDFV1(data=data, extended=np.sum(data.values()))
        for data in datasets
    ]
    if alphas is not None:
        pdfs = {a: p for a, p in zip(alphas, pdfs)}
    alpha = zfit.Parameter("alpha", 0, -5, 5)
    morph = SplineMorphingPDF(alpha=alpha, hists=pdfs)
    if alphas is None:
        alphas = [-1, 0, 1]
    for i, a in enumerate(alphas):
        alpha.set_value(a)
        np.testing.assert_allclose(morph.counts(), counts[i])
        assert pytest.approx(np.sum(counts[i])) == zfit.run(morph.get_yield())
        if len(alphas) > i + 1:
            alpha.set_value((a + alphas[i + 1]) / 2)
            max_dist = (counts[i] - counts[i + 1])**2 + 5  # tolerance
            max_dist *= 1.1  # not strict, it can be a bit higher
            numpy.testing.assert_array_less((morph.counts() - counts[i])**2,
                                            max_dist)
            numpy.testing.assert_array_less(
                (morph.counts() - counts[i + 1])**2, max_dist)
Ejemplo n.º 8
0
    def __init__(
        self,
        model: ztyping.BinnedPDFInputType,
        data: ztyping.BinnedDataInputType,
        constraints: ConstraintsInputType = None,
        options: OptionsInputType = None,
    ):
        model = convert_to_container(model)
        data = convert_to_container(data)
        from zfit._data.binneddatav1 import BinnedData

        data = [
            BinnedData.from_hist(d) if
            (isinstance(d, PlottableHistogram)
             and not isinstance(d, ZfitBinnedData)) else d for d in data
        ]
        not_binned_pdf = [
            mod for mod in model if not isinstance(mod, ZfitBinnedPDF)
        ]
        not_binned_data = [
            dat for dat in data if not isinstance(dat, ZfitBinnedData)
        ]
        not_binned_pdf_msg = (
            "The following PDFs are not binned but need to be. They can be wrapped in an "
            f"BinnedFromUnbinnedPDF. {not_binned_pdf} ")
        not_binned_data_msg = (
            "The following datasets are not binned but need to be. They can be converted to a binned "
            f"using the `to_binned` method. {not_binned_data}")
        error_msg = ""
        if not_binned_pdf:
            error_msg += not_binned_pdf_msg
        if not_binned_data:
            error_msg += not_binned_data_msg
        if error_msg:
            raise ValueError(error_msg)

        super().__init__(
            model=model,
            data=data,
            constraints=constraints,
            fit_range=None,
            options=options,
        )
Ejemplo n.º 9
0
    def _convert_input_binned_x(self, x, none_is_space=None):
        if x is None and none_is_space:
            return self.space
        if isinstance(x, uhi.typing.plottable.PlottableHistogram) and not isinstance(
            x, ZfitBinnedData
        ):
            x = BinnedData.from_hist(x)
        if not isinstance(x, ZfitBinnedData):
            if not isinstance(x, ZfitSpace):
                if not isinstance(x, ZfitUnbinnedData):
                    try:
                        x = Data.from_tensor(obs=self.obs, tensor=x)
                    except Exception as error:

                        raise TypeError(
                            f"Data to {self} has to be Binned Data, not {x}. (It can also be unbinned Data)"
                            + f" but conversion to it failed (see also above) with the following error:"
                            + f" {error})"
                        ) from error

            # TODO: should we allow spaces? Or what?
        return x
Ejemplo n.º 10
0
def test_from_and_to_binned():
    h3 = hist.Hist(
        hist.axis.Regular(3, -3, 3, name="x", flow=False),
        hist.axis.Regular(2, -5, 5, name="y", flow=False),
        storage=hist.storage.Weight(),
    )

    x2 = np.random.randn(1_000)
    y2 = 0.5 * np.random.randn(1_000)

    h3.fill(x=x2, y=y2)

    from zfit._data.binneddatav1 import BinnedData

    h1 = BinnedData.from_hist(h3)
    for _ in range(10):  # make sure this works many times
        unbinned = h1.to_unbinned()
        binned = unbinned.to_binned(space=h1.space)
        np.testing.assert_allclose(binned.values(), h1.values())
        # we can't test the variances, this info is lost
        h1 = binned
    bh3 = bh.Histogram(h1)
    np.testing.assert_allclose(h1.values(), bh3.values())
Ejemplo n.º 11
0
def test_with_obs():
    from zfit._data.binneddatav1 import BinnedData

    h1 = hist.NamedHist(
        hist.axis.Regular(25, -3.5, 3, name="x", flow=False),
        hist.axis.Regular(21, -4, 5, name="y", flow=False),
        hist.axis.Regular(15, -2, 1, name="z", flow=False),
        storage=hist.storage.Weight(),
    )

    x2 = np.random.randn(1_000)
    y2 = 0.5 * np.random.randn(1_000)
    z2 = 0.3 * np.random.randn(1_000)

    h1.fill(x=x2, y=y2, z=z2)
    h = BinnedData.from_hist(h1)
    obs = ("x", "y", "z")
    obs2 = ("y", "x", "z")
    assert obs == h.obs
    h2 = h.with_obs(obs2)
    assert h2.obs == obs2
    np.testing.assert_allclose(h.values()[:, 3, 5], h2.values()[3, :, 5])
    np.testing.assert_allclose(h.variances()[:, 3, 5], h2.variances()[3, :, 5])
Ejemplo n.º 12
0
def test_morphing_templates(alphas):
    bins1 = 15
    irregular_str = "irregular templates" if alphas is not None else ""

    counts1 = np.random.uniform(70, high=100, size=bins1)  # generate counts
    counts = [
        counts1 - np.random.uniform(high=20, size=bins1),
        counts1,
        counts1 + np.random.uniform(high=20, size=bins1),
    ]
    if alphas is not None:
        counts.append(counts1 + np.random.uniform(high=5, size=bins1))
    binning = zfit.binned.RegularBinning(bins1, 0, 10, name="obs1")
    obs = zfit.Space(obs="obs1", binning=binning)
    histos = [BinnedData.from_tensor(obs, count) for count in counts]
    pdfs = [zfit.pdf.HistogramPDF(h) for h in histos]
    if alphas is not None:
        pdfs = {a: p for a, p in zip(alphas, pdfs)}
    alpha = zfit.Parameter("alpha", 0, -5, 5)
    morph = SplineMorphingPDF(alpha=alpha, hists=pdfs)
    if alphas is None:
        alphas = [-1, 0, 1]
    for i, a in enumerate(alphas):
        alpha.set_value(a)
        np.testing.assert_allclose(morph.counts(), counts[i])
        if len(alphas) > i + 1:
            alpha.set_value((a + alphas[i + 1]) / 2)
            max_dist = (counts[i] - counts[i + 1])**2 + 5  # tolerance
            max_dist *= 1.1  # not strict, it can be a bit higher
            numpy.testing.assert_array_less((morph.counts() - counts[i])**2,
                                            max_dist)
            numpy.testing.assert_array_less(
                (morph.counts() - counts[i + 1])**2, max_dist)

    import matplotlib.cm as cm

    amin, amax = -2, 2
    n = 5

    template_alphas = np.array(list(alphas))

    for do_3d in [True, False]:
        plt.figure()
        if do_3d:
            ax = plt.gcf().add_subplot(111, projection="3d")
        else:
            ax = plt.gca()
        plotstyle = "3d plot" if do_3d else "hist plot"
        plt.title(f"Morphing with splines {irregular_str} {plotstyle}")

        for a in list(znp.linspace(amin, amax, n * 2)) + list(template_alphas):
            normed_a = (a - amin) / (amax -
                                     amin) / 1.3  # 3 is a scaling factor
            color = cm.get_cmap("winter")(normed_a)
            alpha.set_value(a)
            histo = morph.ext_pdf(None)
            histo = BinnedData.from_tensor(obs, histo)
            histo = histo.to_hist()
            values = histo.values()
            x = histo.axes.edges[0][:-1]
            y = np.broadcast_to(a, values.shape)
            z = values
            label = None
            if do_3d:
                ax.step(x, y, z, color=color, where="pre", label=label)
            else:
                if np.min((a - template_alphas)**2) < 0.0001:
                    label = f"alpha={a}"
                mplhep.histplot(histo, label=label, color=color)
        ax.set_xlabel("observable")
        ax.set_ylabel("alpha")
        if do_3d:
            ax.set_zlabel("ext_pdf")
        plt.legend()
        pytest.zfit_savefig()
Ejemplo n.º 13
0
def test_binned_template_pdf_bbfull(TemplateLikePDF):
    bins1 = 15
    bins2 = 10

    counts1 = np.random.uniform(high=150,
                                size=(bins1, bins2))  # generate counts
    counts2 = np.random.normal(loc=50, size=(bins1, bins2))
    counts3 = (np.linspace(10, 100, num=bins1)[:, None] *
               np.linspace(10, 500, num=bins2)[None, :])
    binnings = [
        zfit.binned.RegularBinning(bins1, 0, 10, name="obs1"),
        zfit.binned.RegularBinning(bins2, -10, 7, name="obs2"),
    ]
    binning = binnings
    obs = zfit.Space(obs=["obs1", "obs2"], binning=binning)

    mc1 = BinnedData.from_tensor(space=obs,
                                 values=counts1,
                                 variances=znp.ones_like(counts1) * 1.3)
    mc2 = BinnedData.from_tensor(obs, counts2)
    mc3 = BinnedData.from_tensor(obs, counts3)

    counts_mc = counts1 + counts2 + counts3

    counts1_data = np.random.uniform(high=150,
                                     size=(bins1, bins2))  # generate counts
    counts2_data = np.random.normal(loc=50, size=(bins1, bins2))
    counts3_data = (np.linspace(10, 100, num=bins1)[:, None] *
                    np.linspace(20, 490, num=bins2)[None, :])
    counts_data = counts1_data + counts2_data + counts3_data
    counts_data *= 1.1
    data = BinnedData.from_tensor(space=obs, values=counts_data)

    pdf1 = TemplateLikePDF(data=mc1, sysshape=True)
    pdf2 = TemplateLikePDF(data=mc2, sysshape=True)
    pdf3 = TemplateLikePDF(data=mc3, sysshape=True)
    assert len(pdf1.counts()) > 0
    pdf_sum = BinnedSumPDF(pdfs=[pdf1, pdf2, pdf3], obs=obs)
    counts1_flat = np.reshape(counts1, -1)
    constraints1 = zfit.constraint.GaussianConstraint(
        pdf1.params.values(),
        observation=np.ones_like(counts1_flat),
        uncertainty=np.sqrt(counts1_flat) / counts1_flat,
    )
    counts2_flat = np.reshape(counts2, -1)
    constraints2 = zfit.constraint.GaussianConstraint(
        pdf2.params.values(),
        observation=np.ones_like(counts2_flat),
        uncertainty=np.sqrt(counts2_flat) / counts2_flat,
    )
    counts3_flat = np.reshape(counts3, -1)
    constraints3 = zfit.constraint.GaussianConstraint(
        pdf3.params.values(),
        observation=np.ones_like(counts3_flat),
        uncertainty=np.sqrt(counts3_flat) / counts3_flat,
    )
    # constraints2 = zfit.constraint.PoissonConstraint(pdf2.params.values(), np.reshape(counts2, -1))
    # constraints3 = zfit.constraint.PoissonConstraint(pdf3.params.values(), np.reshape(counts3, -1))
    loss = ExtendedBinnedNLL(
        pdf_sum, data, constraints=[constraints1, constraints2, constraints3])
    # for i in progressbar.progressbar(range(1000000)):
    loss.value()
    loss.gradients()
    print("start minimization")
    minimizer = zfit.minimize.Minuit(verbosity=8, gradient=False)
    minimizer.minimize(loss)

    counts = pdf_sum.counts()
    np.testing.assert_array_less(
        counts_mc, counts_data
    )  # this is an assumption, if that is wrong, the test is flawed
    np.testing.assert_array_less(counts, counts_data)
    np.testing.assert_array_less(counts_mc, counts)
Ejemplo n.º 14
0
Archivo: data.py Proyecto: zfit/zfit
    def to_binned(self, space):
        from zfit._data.binneddatav1 import BinnedData

        return BinnedData.from_unbinned(space=space, data=self)