def __init__(self, rate): """ Args: rate (:obj:`tensor` or :obj:`float`): The mean of the Poisson distribution (the expected number of events) """ tensorlib, _ = get_backend() self.rate = rate self._pdf = tensorlib.poisson_dist(rate)
def _precompute(self): if not self.param_viewer.index_selection: return tensorlib, _ = get_backend() self.lumi_mask = tensorlib.tile(tensorlib.astensor(self._lumi_mask), (1, 1, self.batch_size or 1, 1)) self.lumi_mask_bool = tensorlib.astensor(self.lumi_mask, dtype="bool") self.lumi_default = tensorlib.ones(self.lumi_mask.shape)
def _joint_logpdf(terms, batch_size=None): tensorlib, _ = get_backend() if len(terms) == 1: return terms[0] if len(terms) == 2 and batch_size is None: return terms[0] + terms[1] terms = tensorlib.stack(terms) return tensorlib.sum(terms, axis=0)
def expected_data(self, pars, include_auxdata=True): tensorlib, _ = get_backend() expected_main = tensorlib.astensor( [self._make_main_pdf(pars).expected_data()]) aux_data = tensorlib.astensor( [self._make_constraint_pdf(pars).expected_data()]) if not include_auxdata: return expected_main return tensorlib.concatenate([expected_main, aux_data])
def expected_value(self, nsigma): """ Return the expected value of the test statistic. Examples: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> mean = pyhf.tensorlib.astensor([5]) >>> std = pyhf.tensorlib.astensor([1]) >>> normal = pyhf.probability.Normal(mean, std) >>> samples = normal.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples) >>> dist.expected_value(nsigma=1) 6.15094381... >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> init_pars = model.config.suggested_init() >>> par_bounds = model.config.suggested_bounds() >>> fixed_params = model.config.suggested_fixed() >>> mu_test = 1.0 >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars)) >>> samples = pdf.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution( ... pyhf.tensorlib.astensor( ... [ ... pyhf.infer.test_statistics.qmu_tilde( ... mu_test, sample, model, init_pars, par_bounds, fixed_params ... ) ... for sample in samples ... ] ... ) ... ) >>> n_sigma = pyhf.tensorlib.astensor([-2, -1, 0, 1, 2]) >>> dist.expected_value(n_sigma) array([0.00000000e+00, 0.00000000e+00, 5.53671231e-04, 8.29987137e-01, 2.99592664e+00]) Args: nsigma (:obj:`int` or :obj:`tensor`): The number of standard deviations. Returns: Float: The expected value of the test statistic. """ tensorlib, _ = get_backend() return tensorlib.percentile(self.samples, tensorlib.normal_cdf(nsigma) * 100, interpolation="linear")
def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [array(0.), array(0.), array(0.08403955), array(0.21892596), array(0.86072977)] Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tb, _ = get_backend() pvalues = tb.astensor([ self.pvalues(test_stat, sig_plus_bkg_distribution, bkg_only_distribution) for test_stat in bkg_only_distribution.samples ]) # percentiles for -2, -1, 0, 1, 2 standard deviations of the Normal distribution normal_percentiles = tb.astensor( [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681]) pvalues_exp_band = tb.transpose( tb.percentile(pvalues, normal_percentiles, axis=0)) return [[tb.astensor(pvalue) for pvalue in band] for band in pvalues_exp_band]
def __init__(self, loc, scale): """ Args: loc (:obj:`tensor` or :obj:`float`): The mean of the Normal distribution scale (:obj:`tensor` or :obj:`float`): The standard deviation of the Normal distribution """ tensorlib, _ = get_backend() self.loc = loc self.scale = scale self._pdf = tensorlib.normal_dist(loc, scale)
def _final_objective(pars, data, fixed_values, fixed_idx, variable_idx, do_stitch, objective, pdf): log.debug('jitting function') tensorlib, _ = get_backend() pars = tensorlib.astensor(pars) if do_stitch: tv = _TensorViewer([fixed_idx, variable_idx]) constrained_pars = tv.stitch( [tensorlib.astensor(fixed_values, dtype='float'), pars]) else: constrained_pars = pars return objective(constrained_pars, data, pdf)[0]
def pvalue(self, value): """ Compute the :math:`p`-value for a given value of the test statistic. Examples: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> mean = pyhf.tensorlib.astensor([5]) >>> std = pyhf.tensorlib.astensor([1]) >>> normal = pyhf.probability.Normal(mean, std) >>> samples = normal.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples) >>> dist.pvalue(7) array(0.02) >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> init_pars = model.config.suggested_init() >>> par_bounds = model.config.suggested_bounds() >>> fixed_params = model.config.suggested_fixed() >>> mu_test = 1.0 >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars)) >>> samples = pdf.sample((100,)) >>> test_stat_dist = pyhf.infer.calculators.EmpiricalDistribution( ... pyhf.tensorlib.astensor( ... [pyhf.infer.test_statistics.qmu_tilde(mu_test, sample, model, init_pars, par_bounds, fixed_params) for sample in samples] ... ) ... ) >>> test_stat_dist.pvalue(test_stat_dist.samples[9]) array(0.3) Args: value (:obj:`float`): The test statistic value. Returns: Tensor: The integrated probability to observe a value at least as large as the observed one. """ tensorlib, _ = get_backend() return tensorlib.astensor( tensorlib.sum( tensorlib.where(self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0))) / tensorlib.shape(self.samples)[0])
def wrap_objective(objective, data, pdf, stitch_pars, do_grad=False, jit_pieces=None): """ Wrap the objective function for the minimization. Args: objective (:obj:`func`): objective function data (:obj:`list`): observed data pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`. do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off. Returns: objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair """ tensorlib, _ = get_backend() # NB: tuple arguments that need to be hashable (static_argnums) if do_grad: def func(pars): # need to convert to tuple to make args hashable return _jitted_objective_and_grad( pars, data, jit_pieces['fixed_values'], tuple(jit_pieces['fixed_idx']), tuple(jit_pieces['variable_idx']), jit_pieces['do_stitch'], objective, pdf, ) else: def func(pars): # need to convert to tuple to make args hashable return _jitted_objective( pars, data, jit_pieces['fixed_values'], tuple(jit_pieces['fixed_idx']), tuple(jit_pieces['variable_idx']), jit_pieces['do_stitch'], objective, pdf, ) return func
def __init__(self, samples): """ Empirical distribution. Args: samples (:obj:`tensor`): The test statistics sampled from the distribution. Returns: ~pyhf.infer.calculators.EmpiricalDistribution: The empirical distribution of the test statistic. """ tensorlib, _ = get_backend() self.samples = tensorlib.ravel(samples)
def _precompute(self): if not self.param_viewer.index_selection: return tensorlib, _ = get_backend() self.normsys_mask = tensorlib.tile( tensorlib.astensor(self._normsys_mask, dtype="bool"), (1, 1, self.batch_size or 1, 1), ) self.normsys_default = tensorlib.ones(self.normsys_mask.shape) if self.batch_size is None: self.indices = tensorlib.reshape( self.param_viewer.indices_concatenated, (-1, 1) )
def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( ... data, model, test_stat="qtilde" ... ) >>> _ = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)] Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ # Calling pvalues is easier then repeating the CLs calculation here tb, _ = get_backend() return list( map( list, zip(*(self.pvalues(test_stat, sig_plus_bkg_distribution, bkg_only_distribution) for test_stat in [ bkg_only_distribution.expected_value(n_sigma) for n_sigma in [2, 1, 0, -1, -2] ])), ))
def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> q_tilde = toy_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) >>> CLsb, CLb, CLs = toy_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) >>> CLsb, CLb, CLs (array(0.03), array(0.37), array(0.08108108)) Args: teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tensorlib, _ = get_backend() CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = bkg_only_distribution.pvalue(teststat) CLs = tensorlib.astensor(CLsb / CLb) return CLsb, CLb, CLs
def pvalue(self, value): r""" The :math:`p`-value for a given value of the test statistic corresponding to signal strength :math:`\mu` and Asimov strength :math:`\mu'` as defined in Equations (59) and (57) of :xref:`arXiv:1007.1727` .. math:: p_{\mu} = 1-F\left(q_{\mu}\middle|\mu'\right) = 1- \Phi\left(\sqrt{q_{\mu}} - \frac{\left(\mu-\mu'\right)}{\sigma}\right) with Equation (29) .. math:: \frac{(\mu-\mu')}{\sigma} = \sqrt{\Lambda}= \sqrt{q_{\mu,A}} given the observed test statistics :math:`q_{\mu}` and :math:`q_{\mu,A}`. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0) >>> bkg_dist.pvalue(0.0) array(0.5) Args: value (:obj:`float`): The test statistic value. Returns: Tensor: The integrated probability to observe a value at least as large as the observed one. """ tensorlib, _ = get_backend() # computing cdf(-x) instead of 1-cdf(x) for right-tail p-value for improved numerical stability return_value = tensorlib.normal_cdf(-(value - self.shift)) invalid_value = tensorlib.ones( tensorlib.shape(return_value)) * float("nan") return tensorlib.where( tensorlib.astensor(value >= self.cutoff, dtype="bool"), return_value, invalid_value, )
def apply(self, pars): """ Returns: modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin) """ if not self.param_viewer.index_selection: return tensorlib, _ = get_backend() lumis = self.param_viewer.get(pars) if self.batch_size is None: results_lumi = tensorlib.einsum('msab,x->msab', self.lumi_mask, lumis) else: results_lumi = tensorlib.einsum('msab,xa->msab', self.lumi_mask, lumis) return tensorlib.where(self.lumi_mask_bool, results_lumi, self.lumi_default)
def wrap_objective(objective, data, pdf, stitch_pars, do_grad=False, jit_pieces=None): """ Wrap the objective function for the minimization. Args: objective (:obj:`func`): objective function data (:obj:`list`): observed data pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`. do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off. Returns: objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair """ tensorlib, _ = get_backend() if do_grad: def func(pars): pars = tensorlib.astensor(pars) with tf.GradientTape() as tape: tape.watch(pars) constrained_pars = stitch_pars(pars) constr_nll = objective(constrained_pars, data, pdf) # NB: tape.gradient can return a sparse gradient (tf.IndexedSlices) # when tf.gather is used and this needs to be converted back to a # tensor to be usable as a value grad = tape.gradient(constr_nll, pars) return constr_nll.numpy()[0], tf.convert_to_tensor(grad) else: def func(pars): pars = tensorlib.astensor(pars) constrained_pars = stitch_pars(pars) return objective(constrained_pars, data, pdf)[0] return func
def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( ... data, model, test_stat="qtilde" ... ) >>> q_tilde = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) >>> CLsb, CLb, CLs = asymptotic_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) >>> CLsb, CLb, CLs (array(0.02332502), array(0.4441594), array(0.05251497)) Args: teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tensorlib, _ = get_backend() CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = bkg_only_distribution.pvalue(teststat) CLs = tensorlib.astensor(CLsb / CLb) return CLsb, CLb, CLs
def apply(self, pars): """ Returns: modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin) """ if not self.param_viewer.index_selection: return tensorlib, _ = get_backend() if self.batch_size is None: normsys_alphaset = self.param_viewer.get(pars, self.indices) else: normsys_alphaset = self.param_viewer.get(pars) results_norm = self.interpolator(normsys_alphaset) # either rely on numerical no-op or force with line below results_norm = tensorlib.where( self.normsys_mask, results_norm, self.normsys_default ) return results_norm
def _qmu_like( mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False ): """ Clipped version of _tmu_like where the returned test statistic is 0 if muhat > 0 else tmu_like_stat. If the lower bound of the POI is 0 this automatically implements qmu_tilde. Otherwise this is qmu (no tilde). """ tensorlib, optimizer = get_backend() tmu_like_stat, (mubhathat, muhatbhat) = _tmu_like( mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=True ) qmu_like_stat = tensorlib.where( muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor(0.0), tmu_like_stat ) if return_fitted_pars: return qmu_like_stat, (mubhathat, muhatbhat) return qmu_like_stat
def wrap_objective(objective, data, pdf, stitch_pars, do_grad=False, jit_pieces=None): """ Wrap the objective function for the minimization. Args: objective (:obj:`func`): objective function data (:obj:`list`): observed data pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json stitch_pars (:obj:`func`): callable that stitches parameters, see :func:`pyhf.optimize.common.shim`. do_grad (:obj:`bool`): enable autodifferentiation mode. Default is off. Returns: objective_and_grad (:obj:`func`): tensor backend wrapped objective,gradient pair """ tensorlib, _ = get_backend() if do_grad: def func(pars): pars = tensorlib.astensor(pars) pars.requires_grad = True constrained_pars = stitch_pars(pars) constr_nll = objective(constrained_pars, data, pdf) grad = torch.autograd.grad(constr_nll, pars)[0] return constr_nll.detach().numpy()[0], grad else: def func(pars): pars = tensorlib.astensor(pars) constrained_pars = stitch_pars(pars) constr_nll = objective(constrained_pars, data, pdf) return constr_nll[0] return func
def cdf(self, value): """ Compute the value of the cumulative distribution function for a given value of the test statistic. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0) >>> bkg_dist.cdf(0.0) 0.5 Args: value (:obj:`float`): The test statistic value. Returns: Float: The integrated probability to observe a test statistic less than or equal to the observed ``value``. """ tensorlib, _ = get_backend() return tensorlib.normal_cdf(value - self.shift)
def slow(self, auxdata, pars): tensorlib, _ = pyhf.get_backend() # iterate over all constraints order doesn't matter.... start_index = 0 summands = None for cname in self.config.auxdata_order: parset, parslice = ( self.config.param_set(cname), self.config.par_slice(cname), ) end_index = start_index + parset.n_parameters thisauxdata = auxdata[start_index:end_index] start_index = end_index if parset.pdf_type == 'normal': paralphas = pars[parslice] sigmas = ( parset.sigmas if hasattr(parset, 'sigmas') else tensorlib.ones(paralphas.shape) ) sigmas = tensorlib.astensor(sigmas) constraint_term = tensorlib.normal_logpdf( thisauxdata, paralphas, sigmas ) elif parset.pdf_type == 'poisson': paralphas = tensorlib.product( tensorlib.stack( [pars[parslice], tensorlib.astensor(parset.factors)] ), axis=0, ) constraint_term = tensorlib.poisson_logpdf(thisauxdata, paralphas) summands = ( constraint_term if summands is None else tensorlib.concatenate([summands, constraint_term]) ) return tensorlib.sum(summands) if summands is not None else 0
def apply(self, pars): """ Returns: modification tensor: Shape (n_modifiers, n_global_samples, n_alphas, n_global_bin) """ if not self.param_viewer.index_selection: return tensorlib, _ = get_backend() if self.batch_size is None: normfactors = self.param_viewer.get(pars) results_normfactor = tensorlib.einsum('msab,m->msab', self.normfactor_mask, normfactors) else: normfactors = self.param_viewer.get(pars) results_normfactor = tensorlib.einsum('msab,ma->msab', self.normfactor_mask, normfactors) results_normfactor = tensorlib.where(self.normfactor_mask_bool, results_normfactor, self.normfactor_default) return results_normfactor
def _tmu_like( mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False ): """ Basic Profile Likelihood test statistic. If the lower bound of the POI is 0 this automatically implements tmu_tilde. Otherwise this is tmu (no tilde). """ tensorlib, optimizer = get_backend() mubhathat, fixed_poi_fit_lhood_val = fixed_poi_fit( mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True ) muhatbhat, unconstrained_fit_lhood_val = fit( data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True ) log_likelihood_ratio = fixed_poi_fit_lhood_val - unconstrained_fit_lhood_val tmu_like_stat = tensorlib.astensor( tensorlib.clip(log_likelihood_ratio, 0.0, max_value=None) ) if return_fitted_pars: return tmu_like_stat, (mubhathat, muhatbhat) return tmu_like_stat
def logpdf(self, pars, data): tensorlib, _ = get_backend() maindata, auxdata = data main = self._make_main_pdf(pars).log_prob(maindata) constraint = self._make_constraint_pdf(pars).log_prob(auxdata) return tensorlib.astensor([main + constraint])
def hypotest( poi_test, data, pdf, init_pars=None, par_bounds=None, fixed_params=None, calctype="asymptotics", return_tail_probs=False, return_expected=False, return_expected_set=False, return_calculator=False, **kwargs, ): r""" Compute :math:`p`-values and test statistics for a single value of the parameter of interest. See :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` and :py:class:`~pyhf.infer.calculators.ToyCalculator` on additional keyword arguments to be specified. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata) >>> mu_test = 1.0 >>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest( ... mu_test, data, model, return_expected_set=True, test_stat="qtilde" ... ) >>> CLs_obs array(0.05251497) >>> CLs_exp_band [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)] Args: poi_test (Number or Tensor): The value of the parameter of interest (POI) data (Number or Tensor): The data considered pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json`` init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization. par_bounds (:obj:`tensor`): The extrema of values the model parameters are allowed to reach in the fit. The shape should be ``(n, 2)`` for ``n`` model parameters. fixed_params (:obj:`tensor` of :obj:`bool`): The flag to set a parameter constant to its starting value during minimization. calctype (:obj:`str`): The calculator to create. Choose either 'asymptotics' (default) or 'toybased'. return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}` return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}` return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band" return_calculator (:obj:`bool`): Bool for returning calculator. Returns: Tuple of Floats and lists of Floats and a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance: - :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to the given threshold :math:`\alpha`, typically taken to be :math:`0.05`, defined in :xref:`arXiv:1007.1727` as .. math:: \mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}} to protect against excluding signal models in which there is little sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha` the given signal model is excluded. - :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The signal + background model hypothesis :math:`p`-value .. math:: \mathrm{CL}_{s+b} = p_{s+b} = p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right) = \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right) and 1 minus the background only model hypothesis :math:`p`-value .. math:: \mathrm{CL}_{b} = 1- p_{b} = p\left(q \geq q_{\mathrm{obs}}\middle|b\right) = 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right) for signal strength :math:`\mu` and model hypothesis signal strength :math:`\mu'`, where the cumulative density functions :math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57) and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`. Only returned when ``return_tail_probs`` is ``True``. .. note:: The definitions of the :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}` used are based on profile likelihood ratio test statistics. This procedure is common in the LHC-era, but differs from procedures used in the LEP and Tevatron eras, as briefly discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`. - :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}` value corresponding to the test statistic under the background only hypothesis :math:`\left(\mu=0\right)`. Only returned when ``return_expected`` is ``True``. - :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. That is, the :math:`p`-values that satisfy Equation (89) of :xref:`arXiv:1007.1727` .. math:: \mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`. These values define the boundaries of an uncertainty band sometimes referred to as the "Brazil band". Only returned when ``return_expected_set`` is ``True``. - a calculator: The calculator instance used in the computation of the :math:`p`-values. Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` or :py:class:`~pyhf.infer.calculators.ToyCalculator`, depending on the value of ``calctype``. Only returned when ``return_calculator`` is ``True``. """ init_pars = init_pars or pdf.config.suggested_init() par_bounds = par_bounds or pdf.config.suggested_bounds() fixed_params = fixed_params or pdf.config.suggested_fixed() _check_hypotest_prerequisites(pdf, data, init_pars, par_bounds, fixed_params) calc = utils.create_calculator( calctype, data, pdf, init_pars, par_bounds, fixed_params, **kwargs, ) teststat = calc.teststatistic(poi_test) sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions( poi_test) tb, _ = get_backend() CLsb_obs, CLb_obs, CLs_obs = tuple( tb.astensor(pvalue) for pvalue in calc.pvalues( teststat, sig_plus_bkg_distribution, bkg_only_distribution)) CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( sig_plus_bkg_distribution, bkg_only_distribution) is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0' _returns = [CLsb_obs if is_q0 else CLs_obs] if return_tail_probs: if is_q0: _returns.append([CLb_obs]) else: _returns.append([CLsb_obs, CLb_obs]) pvalues_exp_band = [ tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp) ] if return_expected_set: if return_expected: _returns.append(tb.astensor(pvalues_exp_band[2])) _returns.append(pvalues_exp_band) elif return_expected: _returns.append(tb.astensor(pvalues_exp_band[2])) if return_calculator: _returns.append(calc) # Enforce a consistent return type of the observed CLs return tuple(_returns) if len(_returns) > 1 else _returns[0]
def upperlimit(data, model, scan, level=0.05, return_results=False, **hypotest_kwargs): """ Calculate an upper limit interval ``(0, poi_up)`` for a single Parameter of Interest (POI) using a fixed scan through POI-space. Example: >>> import numpy as np >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata) >>> scan = np.linspace(0, 5, 21) >>> obs_limit, exp_limits, (scan, results) = pyhf.infer.intervals.upperlimit( ... data, model, scan, return_results=True ... ) >>> obs_limit array(1.01764089) >>> exp_limits [array(0.59576921), array(0.76169166), array(1.08504773), array(1.50170482), array(2.06654952)] Args: data (:obj:`tensor`): The observed data. model (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``. scan (:obj:`iterable`): Iterable of POI values. level (:obj:`float`): The threshold value to evaluate the interpolated results at. return_results (:obj:`bool`): Whether to return the per-point results. hypotest_kwargs (:obj:`string`): Kwargs for the calls to :class:`~pyhf.infer.hypotest` to configure the fits. Returns: Tuple of Tensors: - Tensor: The observed upper limit on the POI. - Tensor: The expected upper limits on the POI. - Tuple of Tensors: The given ``scan`` along with the :class:`~pyhf.infer.hypotest` results at each test POI. Only returned when ``return_results`` is ``True``. """ tb, _ = get_backend() results = [ hypotest(mu, data, model, return_expected_set=True, **hypotest_kwargs) for mu in scan ] obs = tb.astensor([[r[0]] for r in results]) exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results]) result_arrary = tb.concatenate([obs, exp], axis=1).T # observed limit and the (0, +-1, +-2)sigma expected limits limits = [ _interp(level, result_arrary[idx][::-1], scan[::-1]) for idx in range(6) ] obs_limit, exp_limits = limits[0], limits[1:] if return_results: return obs_limit, exp_limits, (scan, results) return obs_limit, exp_limits
def _interp(x, xp, fp): tb, _ = get_backend() return tb.astensor(np.interp(x, xp.tolist(), fp.tolist()))
def fit( workspace, output_file, measurement, patch, value, backend, optimizer, optconf, ): """ Perform a maximum likelihood fit for a given pyhf workspace. Example: .. code-block:: shell $ curl -sL https://git.io/JJYDE | pyhf fit --value \b { "mle_parameters": { "mu": [ 0.00017298628839781602 ], "uncorr_bkguncrt": [ 1.0000015671710816, 0.9999665895859197 ] }, "twice_nll": 23.19636590468879 } """ # set the backend if not NumPy if backend in ["pytorch", "torch"]: set_backend("pytorch", precision="64b") elif backend in ["tensorflow", "tf"]: set_backend("tensorflow", precision="64b") elif backend in ["jax"]: set_backend("jax") tensorlib, _ = get_backend() optconf = { opt_name: opt_value for item in optconf for opt_name, opt_value in item.items() } # set the new optimizer if optimizer: new_optimizer = getattr(optimize, optimizer) or getattr( optimize, f"{optimizer}_optimizer" ) set_backend(tensorlib, new_optimizer(**optconf)) with click.open_file(workspace, "r") as specstream: spec = json.load(specstream) ws = Workspace(spec) patches = [json.loads(click.open_file(pfile, "r").read()) for pfile in patch] model = ws.model( measurement_name=measurement, patches=patches, ) data = ws.data(model) fit_result = mle.fit(data, model, return_fitted_val=value) _pars = fit_result if not value else fit_result[0] bestfit_pars = { paramset_name: tensorlib.tolist(_pars[paramset_spec["slice"]]) for paramset_name, paramset_spec in model.config.par_map.items() } result = {"mle_parameters": bestfit_pars} if value: result["twice_nll"] = tensorlib.tolist(fit_result[-1]) if output_file is None: click.echo(json.dumps(result, indent=4, sort_keys=True)) else: with open(output_file, "w+") as out_file: json.dump(result, out_file, indent=4, sort_keys=True) log.debug(f"Written to {output_file:s}")