Example #1
0
 def __init__(self, rate):
     """
     Args:
         rate (:obj:`tensor` or :obj:`float`): The mean of the Poisson distribution (the expected number of events)
     """
     tensorlib, _ = get_backend()
     self.rate = rate
     self._pdf = tensorlib.poisson_dist(rate)
Example #2
0
 def _precompute(self):
     if not self.param_viewer.index_selection:
         return
     tensorlib, _ = get_backend()
     self.lumi_mask = tensorlib.tile(tensorlib.astensor(self._lumi_mask),
                                     (1, 1, self.batch_size or 1, 1))
     self.lumi_mask_bool = tensorlib.astensor(self.lumi_mask, dtype="bool")
     self.lumi_default = tensorlib.ones(self.lumi_mask.shape)
Example #3
0
 def _joint_logpdf(terms, batch_size=None):
     tensorlib, _ = get_backend()
     if len(terms) == 1:
         return terms[0]
     if len(terms) == 2 and batch_size is None:
         return terms[0] + terms[1]
     terms = tensorlib.stack(terms)
     return tensorlib.sum(terms, axis=0)
Example #4
0
 def expected_data(self, pars, include_auxdata=True):
     tensorlib, _ = get_backend()
     expected_main = tensorlib.astensor(
         [self._make_main_pdf(pars).expected_data()])
     aux_data = tensorlib.astensor(
         [self._make_constraint_pdf(pars).expected_data()])
     if not include_auxdata:
         return expected_main
     return tensorlib.concatenate([expected_main, aux_data])
Example #5
0
    def expected_value(self, nsigma):
        """
        Return the expected value of the test statistic.

        Examples:

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> mean = pyhf.tensorlib.astensor([5])
            >>> std = pyhf.tensorlib.astensor([1])
            >>> normal = pyhf.probability.Normal(mean, std)
            >>> samples = normal.sample((100,))
            >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples)
            >>> dist.expected_value(nsigma=1)
            6.15094381...

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> init_pars = model.config.suggested_init()
            >>> par_bounds = model.config.suggested_bounds()
            >>> fixed_params = model.config.suggested_fixed()
            >>> mu_test = 1.0
            >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars))
            >>> samples = pdf.sample((100,))
            >>> dist = pyhf.infer.calculators.EmpiricalDistribution(
            ...     pyhf.tensorlib.astensor(
            ...         [
            ...             pyhf.infer.test_statistics.qmu_tilde(
            ...                 mu_test, sample, model, init_pars, par_bounds, fixed_params
            ...             )
            ...             for sample in samples
            ...         ]
            ...     )
            ... )
            >>> n_sigma = pyhf.tensorlib.astensor([-2, -1, 0, 1, 2])
            >>> dist.expected_value(n_sigma)
            array([0.00000000e+00, 0.00000000e+00, 5.53671231e-04, 8.29987137e-01,
                   2.99592664e+00])

        Args:
            nsigma (:obj:`int` or :obj:`tensor`): The number of standard deviations.

        Returns:
            Float: The expected value of the test statistic.
        """
        tensorlib, _ = get_backend()
        return tensorlib.percentile(self.samples,
                                    tensorlib.normal_cdf(nsigma) * 100,
                                    interpolation="linear")
Example #6
0
    def expected_pvalues(self, sig_plus_bkg_distribution,
                         bkg_only_distribution):
        r"""
        Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
        median significance of variations of the signal strength from the
        background only hypothesis :math:`\left(\mu=0\right)` at
        :math:`(-2,-1,0,1,2)\sigma`.

        Example:

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> observations = [51, 48]
            >>> data = observations + model.config.auxdata
            >>> mu_test = 1.0
            >>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
            ...     data, model, ntoys=100, track_progress=False
            ... )
            >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
            >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
            >>> CLs_exp_band
            [array(0.), array(0.), array(0.08403955), array(0.21892596), array(0.86072977)]

        Args:
            sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
              The distribution for the signal + background hypothesis.
            bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
              The distribution for the background-only hypothesis.

        Returns:
            Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic
            corresponding to the :math:`\mathrm{CL}_{s+b}`,
            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
        """
        tb, _ = get_backend()
        pvalues = tb.astensor([
            self.pvalues(test_stat, sig_plus_bkg_distribution,
                         bkg_only_distribution)
            for test_stat in bkg_only_distribution.samples
        ])

        # percentiles for -2, -1, 0, 1, 2 standard deviations of the Normal distribution
        normal_percentiles = tb.astensor(
            [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681])

        pvalues_exp_band = tb.transpose(
            tb.percentile(pvalues, normal_percentiles, axis=0))
        return [[tb.astensor(pvalue) for pvalue in band]
                for band in pvalues_exp_band]
Example #7
0
    def __init__(self, loc, scale):
        """
        Args:
            loc (:obj:`tensor` or :obj:`float`): The mean of the Normal distribution
            scale (:obj:`tensor` or :obj:`float`): The standard deviation of the Normal distribution
        """

        tensorlib, _ = get_backend()
        self.loc = loc
        self.scale = scale
        self._pdf = tensorlib.normal_dist(loc, scale)
Example #8
0
def _final_objective(pars, data, fixed_values, fixed_idx, variable_idx,
                     do_stitch, objective, pdf):
    log.debug('jitting function')
    tensorlib, _ = get_backend()
    pars = tensorlib.astensor(pars)
    if do_stitch:
        tv = _TensorViewer([fixed_idx, variable_idx])
        constrained_pars = tv.stitch(
            [tensorlib.astensor(fixed_values, dtype='float'), pars])
    else:
        constrained_pars = pars
    return objective(constrained_pars, data, pdf)[0]
Example #9
0
    def pvalue(self, value):
        """
        Compute the :math:`p`-value for a given value of the test statistic.

        Examples:

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> mean = pyhf.tensorlib.astensor([5])
            >>> std = pyhf.tensorlib.astensor([1])
            >>> normal = pyhf.probability.Normal(mean, std)
            >>> samples = normal.sample((100,))
            >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples)
            >>> dist.pvalue(7)
            array(0.02)

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> init_pars = model.config.suggested_init()
            >>> par_bounds = model.config.suggested_bounds()
            >>> fixed_params = model.config.suggested_fixed()
            >>> mu_test = 1.0
            >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars))
            >>> samples = pdf.sample((100,))
            >>> test_stat_dist = pyhf.infer.calculators.EmpiricalDistribution(
            ...     pyhf.tensorlib.astensor(
            ...         [pyhf.infer.test_statistics.qmu_tilde(mu_test, sample, model, init_pars, par_bounds, fixed_params) for sample in samples]
            ...     )
            ... )
            >>> test_stat_dist.pvalue(test_stat_dist.samples[9])
            array(0.3)

        Args:
            value (:obj:`float`): The test statistic value.

        Returns:
            Tensor: The integrated probability to observe a value at least as large as the observed one.

        """
        tensorlib, _ = get_backend()
        return tensorlib.astensor(
            tensorlib.sum(
                tensorlib.where(self.samples >= value, tensorlib.astensor(1),
                                tensorlib.astensor(0))) /
            tensorlib.shape(self.samples)[0])
Example #10
0
def wrap_objective(objective,
                   data,
                   pdf,
                   stitch_pars,
                   do_grad=False,
                   jit_pieces=None):
    """
    Wrap the objective function for the minimization.

    Args:
        objective (:obj:`func`): objective function
        data (:obj:`list`): observed data
        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
        stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`.
        do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off.

    Returns:
        objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair
    """
    tensorlib, _ = get_backend()
    # NB: tuple arguments that need to be hashable (static_argnums)
    if do_grad:

        def func(pars):
            # need to convert to tuple to make args hashable
            return _jitted_objective_and_grad(
                pars,
                data,
                jit_pieces['fixed_values'],
                tuple(jit_pieces['fixed_idx']),
                tuple(jit_pieces['variable_idx']),
                jit_pieces['do_stitch'],
                objective,
                pdf,
            )

    else:

        def func(pars):
            # need to convert to tuple to make args hashable
            return _jitted_objective(
                pars,
                data,
                jit_pieces['fixed_values'],
                tuple(jit_pieces['fixed_idx']),
                tuple(jit_pieces['variable_idx']),
                jit_pieces['do_stitch'],
                objective,
                pdf,
            )

    return func
Example #11
0
    def __init__(self, samples):
        """
        Empirical distribution.

        Args:
            samples (:obj:`tensor`): The test statistics sampled from the distribution.

        Returns:
            ~pyhf.infer.calculators.EmpiricalDistribution: The empirical distribution of the test statistic.

        """
        tensorlib, _ = get_backend()
        self.samples = tensorlib.ravel(samples)
Example #12
0
 def _precompute(self):
     if not self.param_viewer.index_selection:
         return
     tensorlib, _ = get_backend()
     self.normsys_mask = tensorlib.tile(
         tensorlib.astensor(self._normsys_mask, dtype="bool"),
         (1, 1, self.batch_size or 1, 1),
     )
     self.normsys_default = tensorlib.ones(self.normsys_mask.shape)
     if self.batch_size is None:
         self.indices = tensorlib.reshape(
             self.param_viewer.indices_concatenated, (-1, 1)
         )
Example #13
0
    def expected_pvalues(self, sig_plus_bkg_distribution,
                         bkg_only_distribution):
        r"""
        Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the
        median significance of variations of the signal strength from the
        background only hypothesis :math:`\left(\mu=0\right)` at
        :math:`(-2,-1,0,1,2)\sigma`.

        Example:

            >>> import pyhf
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> observations = [51, 48]
            >>> data = observations + model.config.auxdata
            >>> mu_test = 1.0
            >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
            ...     data, model, test_stat="qtilde"
            ... )
            >>> _ = asymptotic_calculator.teststatistic(mu_test)
            >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
            >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist)
            >>> CLs_exp_band
            [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]

        Args:
            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
              The distribution for the signal + background hypothesis.
            bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
              The distribution for the background-only hypothesis.

        Returns:
            Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic
            corresponding to the :math:`\mathrm{CL}_{s+b}`,
            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
        """
        # Calling pvalues is easier then repeating the CLs calculation here
        tb, _ = get_backend()
        return list(
            map(
                list,
                zip(*(self.pvalues(test_stat, sig_plus_bkg_distribution,
                                   bkg_only_distribution)
                      for test_stat in [
                          bkg_only_distribution.expected_value(n_sigma)
                          for n_sigma in [2, 1, 0, -1, -2]
                      ])),
            ))
Example #14
0
    def pvalues(self, teststat, sig_plus_bkg_distribution,
                bkg_only_distribution):
        r"""
        Calculate the :math:`p`-values for the observed test statistic under the
        signal + background and background-only model hypotheses.

        Example:

            >>> import pyhf
            >>> import numpy.random as random
            >>> random.seed(0)
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> observations = [51, 48]
            >>> data = observations + model.config.auxdata
            >>> mu_test = 1.0
            >>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
            ...     data, model, ntoys=100, track_progress=False
            ... )
            >>> q_tilde = toy_calculator.teststatistic(mu_test)
            >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test)
            >>> CLsb, CLb, CLs = toy_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist)
            >>> CLsb, CLb, CLs
            (array(0.03), array(0.37), array(0.08108108))

        Args:
            teststat (:obj:`tensor`): The test statistic.
            sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
              The distribution for the signal + background hypothesis.
            bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution):
              The distribution for the background-only hypothesis.

        Returns:
            Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic
            corresponding to the :math:`\mathrm{CL}_{s+b}`,
            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
        """
        tensorlib, _ = get_backend()

        CLsb = sig_plus_bkg_distribution.pvalue(teststat)
        CLb = bkg_only_distribution.pvalue(teststat)
        CLs = tensorlib.astensor(CLsb / CLb)
        return CLsb, CLb, CLs
Example #15
0
    def pvalue(self, value):
        r"""
        The :math:`p`-value for a given value of the test statistic corresponding
        to signal strength :math:`\mu` and Asimov strength :math:`\mu'` as
        defined in Equations (59) and (57) of :xref:`arXiv:1007.1727`

        .. math::

            p_{\mu} = 1-F\left(q_{\mu}\middle|\mu'\right) = 1- \Phi\left(\sqrt{q_{\mu}} - \frac{\left(\mu-\mu'\right)}{\sigma}\right)

        with Equation (29)

        .. math::

            \frac{(\mu-\mu')}{\sigma} = \sqrt{\Lambda}= \sqrt{q_{\mu,A}}

        given the observed test statistics :math:`q_{\mu}` and :math:`q_{\mu,A}`.

        Example:

            >>> import pyhf
            >>> pyhf.set_backend("numpy")
            >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0)
            >>> bkg_dist.pvalue(0.0)
            array(0.5)

        Args:
            value (:obj:`float`): The test statistic value.

        Returns:
            Tensor: The integrated probability to observe a value at least as large as the observed one.

        """
        tensorlib, _ = get_backend()
        # computing cdf(-x) instead of 1-cdf(x) for right-tail p-value for improved numerical stability

        return_value = tensorlib.normal_cdf(-(value - self.shift))
        invalid_value = tensorlib.ones(
            tensorlib.shape(return_value)) * float("nan")
        return tensorlib.where(
            tensorlib.astensor(value >= self.cutoff, dtype="bool"),
            return_value,
            invalid_value,
        )
Example #16
0
    def apply(self, pars):
        """
        Returns:
            modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin)
        """
        if not self.param_viewer.index_selection:
            return

        tensorlib, _ = get_backend()
        lumis = self.param_viewer.get(pars)
        if self.batch_size is None:
            results_lumi = tensorlib.einsum('msab,x->msab', self.lumi_mask,
                                            lumis)
        else:
            results_lumi = tensorlib.einsum('msab,xa->msab', self.lumi_mask,
                                            lumis)

        return tensorlib.where(self.lumi_mask_bool, results_lumi,
                               self.lumi_default)
Example #17
0
def wrap_objective(objective,
                   data,
                   pdf,
                   stitch_pars,
                   do_grad=False,
                   jit_pieces=None):
    """
    Wrap the objective function for the minimization.

    Args:
        objective (:obj:`func`): objective function
        data (:obj:`list`): observed data
        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
        stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`.
        do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off.

    Returns:
        objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair
    """
    tensorlib, _ = get_backend()

    if do_grad:

        def func(pars):
            pars = tensorlib.astensor(pars)
            with tf.GradientTape() as tape:
                tape.watch(pars)
                constrained_pars = stitch_pars(pars)
                constr_nll = objective(constrained_pars, data, pdf)
            # NB: tape.gradient can return a sparse gradient (tf.IndexedSlices)
            # when tf.gather is used and this needs to be converted back to a
            # tensor to be usable as a value
            grad = tape.gradient(constr_nll, pars)
            return constr_nll.numpy()[0], tf.convert_to_tensor(grad)

    else:

        def func(pars):
            pars = tensorlib.astensor(pars)
            constrained_pars = stitch_pars(pars)
            return objective(constrained_pars, data, pdf)[0]

    return func
Example #18
0
    def pvalues(self, teststat, sig_plus_bkg_distribution,
                bkg_only_distribution):
        r"""
        Calculate the :math:`p`-values for the observed test statistic under the
        signal + background and background-only model hypotheses.

        Example:

            >>> import pyhf
            >>> pyhf.set_backend("numpy")
            >>> model = pyhf.simplemodels.uncorrelated_background(
            ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
            ... )
            >>> observations = [51, 48]
            >>> data = observations + model.config.auxdata
            >>> mu_test = 1.0
            >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(
            ...     data, model, test_stat="qtilde"
            ... )
            >>> q_tilde = asymptotic_calculator.teststatistic(mu_test)
            >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test)
            >>> CLsb, CLb, CLs = asymptotic_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist)
            >>> CLsb, CLb, CLs
            (array(0.02332502), array(0.4441594), array(0.05251497))

        Args:
            teststat (:obj:`tensor`): The test statistic.
            sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
              The distribution for the signal + background hypothesis.
            bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution):
              The distribution for the background-only hypothesis.

        Returns:
            Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic
            corresponding to the :math:`\mathrm{CL}_{s+b}`,
            :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`.
        """
        tensorlib, _ = get_backend()

        CLsb = sig_plus_bkg_distribution.pvalue(teststat)
        CLb = bkg_only_distribution.pvalue(teststat)
        CLs = tensorlib.astensor(CLsb / CLb)
        return CLsb, CLb, CLs
Example #19
0
    def apply(self, pars):
        """
        Returns:
            modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin)
        """
        if not self.param_viewer.index_selection:
            return

        tensorlib, _ = get_backend()
        if self.batch_size is None:
            normsys_alphaset = self.param_viewer.get(pars, self.indices)
        else:
            normsys_alphaset = self.param_viewer.get(pars)
        results_norm = self.interpolator(normsys_alphaset)

        # either rely on numerical no-op or force with line below
        results_norm = tensorlib.where(
            self.normsys_mask, results_norm, self.normsys_default
        )
        return results_norm
Example #20
0
def _qmu_like(
    mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False
):
    """
    Clipped version of _tmu_like where the returned test statistic
    is 0 if muhat > 0 else tmu_like_stat.

    If the lower bound of the POI is 0 this automatically implements
    qmu_tilde. Otherwise this is qmu (no tilde).
    """
    tensorlib, optimizer = get_backend()
    tmu_like_stat, (mubhathat, muhatbhat) = _tmu_like(
        mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=True
    )
    qmu_like_stat = tensorlib.where(
        muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor(0.0), tmu_like_stat
    )
    if return_fitted_pars:
        return qmu_like_stat, (mubhathat, muhatbhat)
    return qmu_like_stat
Example #21
0
def wrap_objective(objective,
                   data,
                   pdf,
                   stitch_pars,
                   do_grad=False,
                   jit_pieces=None):
    """
    Wrap the objective function for the minimization.

    Args:
        objective (:obj:`func`): objective function
        data (:obj:`list`): observed data
        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
        stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`.
        do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off.

    Returns:
        objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair
    """

    tensorlib, _ = get_backend()

    if do_grad:

        def func(pars):
            pars = tensorlib.astensor(pars)
            pars.requires_grad = True
            constrained_pars = stitch_pars(pars)
            constr_nll = objective(constrained_pars, data, pdf)
            grad = torch.autograd.grad(constr_nll, pars)[0]
            return constr_nll.detach().numpy()[0], grad

    else:

        def func(pars):
            pars = tensorlib.astensor(pars)
            constrained_pars = stitch_pars(pars)
            constr_nll = objective(constrained_pars, data, pdf)
            return constr_nll[0]

    return func
Example #22
0
    def cdf(self, value):
        """
        Compute the value of the cumulative distribution function for a given value of the test statistic.

        Example:

            >>> import pyhf
            >>> pyhf.set_backend("numpy")
            >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0)
            >>> bkg_dist.cdf(0.0)
            0.5

        Args:
            value (:obj:`float`): The test statistic value.

        Returns:
            Float: The integrated probability to observe a test statistic less than or equal to the observed ``value``.

        """
        tensorlib, _ = get_backend()
        return tensorlib.normal_cdf(value - self.shift)
Example #23
0
    def slow(self, auxdata, pars):
        tensorlib, _ = pyhf.get_backend()
        # iterate over all constraints order doesn't matter....
        start_index = 0
        summands = None
        for cname in self.config.auxdata_order:
            parset, parslice = (
                self.config.param_set(cname),
                self.config.par_slice(cname),
            )
            end_index = start_index + parset.n_parameters
            thisauxdata = auxdata[start_index:end_index]
            start_index = end_index
            if parset.pdf_type == 'normal':
                paralphas = pars[parslice]
                sigmas = (
                    parset.sigmas
                    if hasattr(parset, 'sigmas')
                    else tensorlib.ones(paralphas.shape)
                )
                sigmas = tensorlib.astensor(sigmas)
                constraint_term = tensorlib.normal_logpdf(
                    thisauxdata, paralphas, sigmas
                )
            elif parset.pdf_type == 'poisson':
                paralphas = tensorlib.product(
                    tensorlib.stack(
                        [pars[parslice], tensorlib.astensor(parset.factors)]
                    ),
                    axis=0,
                )

                constraint_term = tensorlib.poisson_logpdf(thisauxdata, paralphas)
            summands = (
                constraint_term
                if summands is None
                else tensorlib.concatenate([summands, constraint_term])
            )
        return tensorlib.sum(summands) if summands is not None else 0
Example #24
0
    def apply(self, pars):
        """
        Returns:
            modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin)
        """
        if not self.param_viewer.index_selection:
            return
        tensorlib, _ = get_backend()
        if self.batch_size is None:
            normfactors = self.param_viewer.get(pars)
            results_normfactor = tensorlib.einsum('msab,m->msab',
                                                  self.normfactor_mask,
                                                  normfactors)
        else:
            normfactors = self.param_viewer.get(pars)
            results_normfactor = tensorlib.einsum('msab,ma->msab',
                                                  self.normfactor_mask,
                                                  normfactors)

        results_normfactor = tensorlib.where(self.normfactor_mask_bool,
                                             results_normfactor,
                                             self.normfactor_default)
        return results_normfactor
Example #25
0
def _tmu_like(
    mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False
):
    """
    Basic Profile Likelihood test statistic.

    If the lower bound of the POI is 0 this automatically implements
    tmu_tilde. Otherwise this is tmu (no tilde).
    """
    tensorlib, optimizer = get_backend()
    mubhathat, fixed_poi_fit_lhood_val = fixed_poi_fit(
        mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
    )
    muhatbhat, unconstrained_fit_lhood_val = fit(
        data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
    )
    log_likelihood_ratio = fixed_poi_fit_lhood_val - unconstrained_fit_lhood_val
    tmu_like_stat = tensorlib.astensor(
        tensorlib.clip(log_likelihood_ratio, 0.0, max_value=None)
    )
    if return_fitted_pars:
        return tmu_like_stat, (mubhathat, muhatbhat)
    return tmu_like_stat
Example #26
0
 def logpdf(self, pars, data):
     tensorlib, _ = get_backend()
     maindata, auxdata = data
     main = self._make_main_pdf(pars).log_prob(maindata)
     constraint = self._make_constraint_pdf(pars).log_prob(auxdata)
     return tensorlib.astensor([main + constraint])
Example #27
0
def hypotest(
    poi_test,
    data,
    pdf,
    init_pars=None,
    par_bounds=None,
    fixed_params=None,
    calctype="asymptotics",
    return_tail_probs=False,
    return_expected=False,
    return_expected_set=False,
    return_calculator=False,
    **kwargs,
):
    r"""
    Compute :math:`p`-values and test statistics for a single value of the parameter of interest.

    See :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` and :py:class:`~pyhf.infer.calculators.ToyCalculator` on additional keyword arguments to be specified.

    Example:
        >>> import pyhf
        >>> pyhf.set_backend("numpy")
        >>> model = pyhf.simplemodels.uncorrelated_background(
        ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
        ... )
        >>> observations = [51, 48]
        >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
        >>> mu_test = 1.0
        >>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest(
        ...     mu_test, data, model, return_expected_set=True, test_stat="qtilde"
        ... )
        >>> CLs_obs
        array(0.05251497)
        >>> CLs_exp_band
        [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]

    Args:
        poi_test (Number or Tensor): The value of the parameter of interest (POI)
        data (Number or Tensor): The data considered
        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``
        init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization.
        par_bounds (:obj:`tensor`): The extrema of values the model parameters
            are allowed to reach in the fit.
            The shape should be ``(n, 2)`` for ``n`` model parameters.
        fixed_params (:obj:`tensor` of :obj:`bool`): The flag to set a parameter constant to its starting
            value during minimization.
        calctype (:obj:`str`): The calculator to create. Choose either 'asymptotics' (default) or 'toybased'.
        return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
        return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
        return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
        return_calculator (:obj:`bool`): Bool for returning calculator.

    Returns:
        Tuple of Floats and lists of Floats and
        a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
        or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance:

            - :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
              the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
              defined in :xref:`arXiv:1007.1727` as

            .. math::

                \mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}}

            to protect against excluding signal models in which there is little
            sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha`
            the given signal model is excluded.

            - :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The
              signal + background model hypothesis :math:`p`-value

            .. math::

                \mathrm{CL}_{s+b} = p_{s+b}
                = p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right)
                = \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq
                = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right)

            and 1 minus the background only model hypothesis :math:`p`-value

            .. math::

                \mathrm{CL}_{b} = 1- p_{b}
                = p\left(q \geq q_{\mathrm{obs}}\middle|b\right)
                = 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq
                = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right)

            for signal strength :math:`\mu` and model hypothesis signal strength
            :math:`\mu'`, where the cumulative density functions
            :math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57)
            and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test
            statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`.
            Only returned when ``return_tail_probs`` is ``True``.

            .. note::

                The definitions of the :math:`\mathrm{CL}_{s+b}` and
                :math:`\mathrm{CL}_{b}` used are based on profile likelihood
                ratio test statistics.
                This procedure is common in the LHC-era, but differs from
                procedures used in the LEP and Tevatron eras, as briefly
                discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`.

            - :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}`
              value corresponding to the test statistic under the background
              only hypothesis :math:`\left(\mu=0\right)`.
              Only returned when ``return_expected`` is ``True``.

            - :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected
              :math:`\mathrm{CL}_{s}` values corresponding to the median
              significance of variations of the signal strength from the
              background only hypothesis :math:`\left(\mu=0\right)` at
              :math:`(-2,-1,0,1,2)\sigma`.
              That is, the :math:`p`-values that satisfy Equation (89) of
              :xref:`arXiv:1007.1727`

            .. math::

                \mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma

            for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`.
            These values define the boundaries of an uncertainty band sometimes
            referred to as the "Brazil band".
            Only returned when ``return_expected_set`` is ``True``.

            - a calculator: The calculator instance used in the computation of the :math:`p`-values.
              Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
              or :py:class:`~pyhf.infer.calculators.ToyCalculator`,
              depending on the value of ``calctype``.
              Only returned when ``return_calculator`` is ``True``.

    """
    init_pars = init_pars or pdf.config.suggested_init()
    par_bounds = par_bounds or pdf.config.suggested_bounds()
    fixed_params = fixed_params or pdf.config.suggested_fixed()

    _check_hypotest_prerequisites(pdf, data, init_pars, par_bounds,
                                  fixed_params)

    calc = utils.create_calculator(
        calctype,
        data,
        pdf,
        init_pars,
        par_bounds,
        fixed_params,
        **kwargs,
    )

    teststat = calc.teststatistic(poi_test)
    sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(
        poi_test)

    tb, _ = get_backend()
    CLsb_obs, CLb_obs, CLs_obs = tuple(
        tb.astensor(pvalue) for pvalue in calc.pvalues(
            teststat, sig_plus_bkg_distribution, bkg_only_distribution))
    CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
        sig_plus_bkg_distribution, bkg_only_distribution)

    is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'

    _returns = [CLsb_obs if is_q0 else CLs_obs]
    if return_tail_probs:
        if is_q0:
            _returns.append([CLb_obs])
        else:
            _returns.append([CLsb_obs, CLb_obs])

    pvalues_exp_band = [
        tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp)
    ]
    if return_expected_set:
        if return_expected:
            _returns.append(tb.astensor(pvalues_exp_band[2]))
        _returns.append(pvalues_exp_band)
    elif return_expected:
        _returns.append(tb.astensor(pvalues_exp_band[2]))
    if return_calculator:
        _returns.append(calc)
    # Enforce a consistent return type of the observed CLs
    return tuple(_returns) if len(_returns) > 1 else _returns[0]
Example #28
0
def upperlimit(data,
               model,
               scan,
               level=0.05,
               return_results=False,
               **hypotest_kwargs):
    """
    Calculate an upper limit interval ``(0, poi_up)`` for a single
    Parameter of Interest (POI) using a fixed scan through POI-space.

    Example:
        >>> import numpy as np
        >>> import pyhf
        >>> pyhf.set_backend("numpy")
        >>> model = pyhf.simplemodels.uncorrelated_background(
        ...     signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
        ... )
        >>> observations = [51, 48]
        >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
        >>> scan = np.linspace(0, 5, 21)
        >>> obs_limit, exp_limits, (scan, results) = pyhf.infer.intervals.upperlimit(
        ...     data, model, scan, return_results=True
        ... )
        >>> obs_limit
        array(1.01764089)
        >>> exp_limits
        [array(0.59576921), array(0.76169166), array(1.08504773), array(1.50170482), array(2.06654952)]

    Args:
        data (:obj:`tensor`): The observed data.
        model (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
        scan (:obj:`iterable`): Iterable of POI values.
        level (:obj:`float`): The threshold value to evaluate the interpolated results at.
        return_results (:obj:`bool`): Whether to return the per-point results.
        hypotest_kwargs (:obj:`string`): Kwargs for the calls to
         :class:`~pyhf.infer.hypotest` to configure the fits.

    Returns:
        Tuple of Tensors:

            - Tensor: The observed upper limit on the POI.
            - Tensor: The expected upper limits on the POI.
            - Tuple of Tensors: The given ``scan`` along with the
              :class:`~pyhf.infer.hypotest` results at each test POI.
              Only returned when ``return_results`` is ``True``.
    """
    tb, _ = get_backend()
    results = [
        hypotest(mu, data, model, return_expected_set=True, **hypotest_kwargs)
        for mu in scan
    ]
    obs = tb.astensor([[r[0]] for r in results])
    exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results])

    result_arrary = tb.concatenate([obs, exp], axis=1).T

    # observed limit and the (0, +-1, +-2)sigma expected limits
    limits = [
        _interp(level, result_arrary[idx][::-1], scan[::-1])
        for idx in range(6)
    ]
    obs_limit, exp_limits = limits[0], limits[1:]

    if return_results:
        return obs_limit, exp_limits, (scan, results)
    return obs_limit, exp_limits
Example #29
0
def _interp(x, xp, fp):
    tb, _ = get_backend()
    return tb.astensor(np.interp(x, xp.tolist(), fp.tolist()))
Example #30
0
def fit(
    workspace,
    output_file,
    measurement,
    patch,
    value,
    backend,
    optimizer,
    optconf,
):
    """
    Perform a maximum likelihood fit for a given pyhf workspace.

    Example:

    .. code-block:: shell

        $ curl -sL https://git.io/JJYDE | pyhf fit --value

        \b
        {
            "mle_parameters": {
                "mu": [
                    0.00017298628839781602
                ],
                "uncorr_bkguncrt": [
                    1.0000015671710816,
                    0.9999665895859197
                ]
            },
            "twice_nll": 23.19636590468879
        }
    """
    # set the backend if not NumPy
    if backend in ["pytorch", "torch"]:
        set_backend("pytorch", precision="64b")
    elif backend in ["tensorflow", "tf"]:
        set_backend("tensorflow", precision="64b")
    elif backend in ["jax"]:
        set_backend("jax")
    tensorlib, _ = get_backend()

    optconf = {
        opt_name: opt_value for item in optconf for opt_name, opt_value in item.items()
    }

    # set the new optimizer
    if optimizer:
        new_optimizer = getattr(optimize, optimizer) or getattr(
            optimize, f"{optimizer}_optimizer"
        )
        set_backend(tensorlib, new_optimizer(**optconf))

    with click.open_file(workspace, "r") as specstream:
        spec = json.load(specstream)
    ws = Workspace(spec)
    patches = [json.loads(click.open_file(pfile, "r").read()) for pfile in patch]

    model = ws.model(
        measurement_name=measurement,
        patches=patches,
    )
    data = ws.data(model)

    fit_result = mle.fit(data, model, return_fitted_val=value)

    _pars = fit_result if not value else fit_result[0]
    bestfit_pars = {
        paramset_name: tensorlib.tolist(_pars[paramset_spec["slice"]])
        for paramset_name, paramset_spec in model.config.par_map.items()
    }

    result = {"mle_parameters": bestfit_pars}
    if value:
        result["twice_nll"] = tensorlib.tolist(fit_result[-1])

    if output_file is None:
        click.echo(json.dumps(result, indent=4, sort_keys=True))
    else:
        with open(output_file, "w+") as out_file:
            json.dump(result, out_file, indent=4, sort_keys=True)
        log.debug(f"Written to {output_file:s}")