Exemple #1
0
    def build_histogram(self):
        # Get the events to estimate the PDF
        dimnames, bins = zip(*self.config['analysis_space'])
        mh = Histdd(bins=bins, axis_names=dimnames)

        # Get a generator function which will give us the events
        get = self.get_events_for_density_estimate
        if not inspect.isgeneratorfunction(get):
            def get():
                return [self.get_events_for_density_estimate()]

        n_events = 0
        for events, n_simulated in get():
            n_events += n_simulated
            mh.add(*utils._events_to_analysis_dimensions(events, self.config['analysis_space']))

        self.fraction_in_range = mh.n / n_events

        # Convert the histogram to a density estimate
        # This means we have to divide by
        #  - the number of events IN RANGE received
        #    (fraction_in_range keeps track of how many events were not in range)
        #  - the bin sizes
        self._pdf_histogram = mh.similar_blank_hist()
        self._pdf_histogram.histogram = mh.histogram.astype(np.float) / mh.n

        # For the bin widths we need to take an outer product of several vectors, for which numpy has no builtin
        # This reduce trick does the job instead, see http://stackoverflow.com/questions/17138393
        self._bin_volumes = reduce(np.multiply, np.ix_(*[np.diff(bs) for bs in bins]))
        self._pdf_histogram.histogram /= self._bin_volumes

        self._n_events_histogram = mh

        return mh
Exemple #2
0
    def build_histogram(self):
        # Get the events to estimate the PDF
        dimnames, bins = zip(*self.config['analysis_space'])
        mh = Histdd(bins=bins, axis_names=dimnames)

        # Get a generator function which will give us the events
        get = self.get_events_for_density_estimate
        if not inspect.isgeneratorfunction(get):

            def get():
                return [self.get_events_for_density_estimate()]

        n_events = 0
        for events, n_simulated in get():
            n_events += n_simulated
            mh.add(*utils._events_to_analysis_dimensions(
                events, self.config['analysis_space']))

        self.fraction_in_range = mh.n / n_events

        # Convert the histogram to a density estimate
        # This means we have to divide by
        #  - the number of events IN RANGE received
        #    (fraction_in_range keeps track of how many events were not in range)
        #  - the bin sizes
        self._pdf_histogram = mh.similar_blank_hist()
        self._pdf_histogram.histogram = mh.histogram.astype(np.float) / mh.n

        # For the bin widths we need to take an outer product of several vectors, for which numpy has no builtin
        # This reduce trick does the job instead, see http://stackoverflow.com/questions/17138393
        self._bin_volumes = reduce(np.multiply,
                                   np.ix_(*[np.diff(bs) for bs in bins]))
        self._pdf_histogram.histogram /= self._bin_volumes

        self._n_events_histogram = mh

        return mh
Exemple #3
0
class BinnedLogLikelihood(LogLikelihoodBase):

    def __init__(self, pdf_base_config, likelihood_config=None, **kwargs):
        LogLikelihoodBase.__init__(self, pdf_base_config, likelihood_config, **kwargs)
        pdf_base_config['pdf_interpolation_method'] = 'piecewise'

        self.model_statistical_uncertainty_handling = self.config.get('model_statistical_uncertainty_handling')

    @inherit_docstring_from(LogLikelihoodBase)
    def prepare(self, *args):
        LogLikelihood.prepare(self, *args)
        self.ps, self.n_model_events = self.base_model.pmf_grids()

        if len(self.shape_parameters):
            self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0],
                                                                  extra_dims=list(self.ps.shape),
                                                                  anchor_models=self.anchor_models)

            if self.model_statistical_uncertainty_handling is not None:
                self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1],
                                                                                  extra_dims=list(self.ps.shape),
                                                                                  anchor_models=self.anchor_models)

    @inherit_docstring_from(LogLikelihoodBase)
    def set_data(self, d):
        LogLikelihoodBase.set_data(self, d)
        # Bin the data in the analysis space
        dimnames, bins = zip(*self.base_model.config['analysis_space'])
        self.data_events_per_bin = Histdd(bins=bins, axis_names=dimnames)
        self.data_events_per_bin.add(*self.base_model.to_analysis_dimensions(d))

    @inherit_docstring_from(LogLikelihoodBase)
    def _compute_single_pdf(self, **kwargs):
        model = self._compute_single_model(**kwargs)
        mus = model.expected_events()
        ps, n_model_events = model.pmf_grids()
        return mus, ps, n_model_events

    @_needs_data
    @inherit_docstring_from(LogLikelihoodBase)
    def adjust_expectations(self, mus, pmfs, n_model_events):
        if self.model_statistical_uncertainty_handling == 'bb_single':

            source_i = self.config.get('bb_single_source')
            if source_i is None:
                raise ValueError("You need to specify bb_single_source to use bb_single_source expectation adjustment")
            source_i = self.base_model.get_source_i(source_i)

            assert pmfs.shape == n_model_events.shape

            # Get the number of events expected for the sources we will NOT adjust
            counts_per_bin = pmfs.copy()
            for i, (mu, _x) in enumerate(zip(mus, counts_per_bin)):
                if i != source_i:
                    _x *= mu
                else:
                    _x *= 0.
            u_bins = np.sum(counts_per_bin, axis=0)

            p_calibration = mus[source_i] / n_model_events[source_i].sum()

            a_bins = n_model_events[source_i]

            A_bins_1, A_bins_2 = beeston_barlow_roots(a_bins, p_calibration, u_bins, self.data_events_per_bin.histogram)
            assert np.all(A_bins_1 <= 0)  # it seems(?) the 1st root is always negative

            # For U=0, the solution above is singular; we need to use a special case instead
            A_bins_special = (self.data_events_per_bin.histogram + a_bins) / (1. + p_calibration)
            A_bins = np.choose(u_bins == 0, [A_bins_2, A_bins_special])

            assert np.all(0 <= A_bins)
            pmfs[source_i] = A_bins / A_bins.sum()
            mus[source_i] = A_bins.sum() * p_calibration

        return mus, pmfs

    def _compute_likelihood(self, mus, pmfs):
        """Return binned Poisson log likelihood
        :param mus: numpy array with expected rates for each source
        :param pmfs: array (sources, *analysis_space) of PMFs for each source in each bin
        """
        expected_counts = pmfs.copy()
        for mu, _p_bin_source in zip(mus, expected_counts):
            _p_bin_source *= mu         # Works because of numpy view magic...
        expected_total = np.sum(expected_counts, axis=0)

        observed_counts = self.data_events_per_bin.histogram

        ret = observed_counts * np.log(expected_total) - expected_total - gammaln(observed_counts + 1.).real
        return np.sum(ret)
Exemple #4
0
class TestHistdd(TestCase):
    def setUp(self):
        self.m = Histdd(range=test_range_2d,
                        bins=test_bins_2d,
                        axis_names=['foo', 'bar'])

    def test_is_instance(self):
        self.assertIsInstance(self.m, Histdd)

    def test_add_data(self):
        m = self.m
        x = [0.1, 0.8, -0.4]
        y = [0, 0, 0]
        m.add(x, y)
        self.assertEqual(
            m.histogram.tolist(),
            np.histogram2d(x, y, range=test_range_2d,
                           bins=test_bins_2d)[0].tolist())
        m.add(x, y)
        self.assertEqual(m.histogram.tolist(), (np.histogram2d(
            x * 2, y * 2, range=test_range_2d, bins=test_bins_2d)[0].tolist()))
        m.add([999, 999], [111, 111])
        self.assertEqual(
            m.histogram.tolist(),
            np.histogram2d(x * 2,
                           y * 2,
                           range=test_range_2d,
                           bins=test_bins_2d)[0].tolist())

    def test_pandas(self):
        import pandas as pd
        m = self.m
        test_data = pd.DataFrame([{'foo': 0, 'bar': 0}, {'foo': 0, 'bar': 5}])
        m.add(test_data)
        self.assertEqual(
            m.histogram.tolist(),
            np.histogram2d([0, 0], [0, 5],
                           range=test_range_2d,
                           bins=test_bins_2d)[0].tolist())

    def test_projection(self):
        m = self.m
        x = [0.1, 0.8, -0.4]
        y = [0, 0, 0]
        m.add(x, y)
        p1 = m.projection(0)
        self.assertEqual(p1.histogram.tolist(), [1, 1, 1])
        self.assertAlmostEqual(
            np.sum(p1.bin_edges - np.array([-1, -1 / 3, 1 / 3, 1])), 0)
        p2 = m.projection(1)
        self.assertEqual(p2.histogram.tolist(), [0, 3, 0])
        self.assertAlmostEqual(
            np.sum(p2.bin_edges - np.array([-1, -1 / 3, 1 / 3, 1])), 0)
        p_2 = m.projection('bar')
        self.assertEqual(p2.histogram.tolist(), p_2.histogram.tolist())
        self.assertEqual(p2.bin_edges.tolist(), p_2.bin_edges.tolist())

    def test_cumulate(self):
        self.m.add([-1, 0, 1], [-10, 0, 10])
        np.testing.assert_equal(self.m.histogram,
                                np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))
        np.testing.assert_equal(
            self.m.cumulate(0).histogram,
            np.array([[1, 0, 0], [1, 1, 0], [1, 1, 1]]))
        np.testing.assert_equal(
            self.m.cumulate(1).histogram,
            np.array([[1, 1, 1], [0, 1, 1], [0, 0, 1]]))
        np.testing.assert_equal(
            self.m.cumulate(1).histogram,
            self.m.cumulative_density(1).histogram)
        self.m.add([-1, 0, 1], [-10, 0, 10])
        np.testing.assert_equal(
            self.m.cumulate(1).histogram,
            2 * self.m.cumulative_density(1).histogram)
class TestHistdd(TestCase):

    def setUp(self):
        self.m = Histdd(range=test_range_2d, bins=test_bins_2d, axis_names=['foo', 'bar'])

    def test_is_instance(self):
        self.assertIsInstance(self.m, Histdd)

    def test_add_data(self):
        m = self.m
        x = [0.1, 0.8, -0.4]
        y = [0, 0, 0]
        m.add(x, y)
        self.assertEqual(m.histogram.tolist(),
                         np.histogram2d(x, y,
                                        range=test_range_2d,
                                        bins=test_bins_2d)[0].tolist())
        m.add(x, y)
        self.assertEqual(m.histogram.tolist(),
                         (np.histogram2d(x*2, y*2,
                                         range=test_range_2d,
                                         bins=test_bins_2d)[0].tolist()))
        m.add([999, 999], [111, 111])
        self.assertEqual(m.histogram.tolist(),
                         np.histogram2d(x*2, y*2,
                                        range=test_range_2d,
                                        bins=test_bins_2d)[0].tolist())

    def test_pandas(self):
        import pandas as pd
        m = self.m
        test_data = pd.DataFrame([{'foo': 0, 'bar': 0}, {'foo': 0, 'bar': 5}])
        m.add(test_data)
        self.assertEqual(m.histogram.tolist(),
                         np.histogram2d([0, 0], [0, 5],
                                        range=test_range_2d,
                                        bins=test_bins_2d)[0].tolist())

    def test_projection(self):
        m = self.m
        x = [0.1, 0.8, -0.4]
        y = [0, 0, 0]
        m.add(x, y)
        p1 = m.projection(0)
        self.assertEqual(p1.histogram.tolist(), [1, 1, 1])
        self.assertAlmostEqual(np.sum(p1.bin_edges - np.array([-1, -1/3, 1/3, 1])), 0)
        p2 = m.projection(1)
        self.assertEqual(p2.histogram.tolist(), [0, 3, 0])
        self.assertAlmostEqual(np.sum(p2.bin_edges - np.array([-1, -1/3, 1/3, 1])), 0)
        p_2 = m.projection('bar')
        self.assertEqual(p2.histogram.tolist(), p_2.histogram.tolist())
        self.assertEqual(p2.bin_edges.tolist(), p_2.bin_edges.tolist())

    def test_cumulate(self):
        self.m.add([-1, 0, 1], [-10, 0, 10])
        np.testing.assert_equal(self.m.histogram,
                                np.array([[1, 0, 0],
                                          [0, 1, 0],
                                          [0, 0, 1]]))
        np.testing.assert_equal(self.m.cumulate(0).histogram,
                                np.array([[1, 0, 0],
                                          [1, 1, 0],
                                          [1, 1, 1]]))
        np.testing.assert_equal(self.m.cumulate(1).histogram,
                                np.array([[1, 1, 1],
                                          [0, 1, 1],
                                          [0, 0, 1]]))
        np.testing.assert_equal(self.m.cumulate(1).histogram,
                                self.m.cumulative_density(1).histogram)
        self.m.add([-1, 0, 1], [-10, 0, 10])
        np.testing.assert_equal(self.m.cumulate(1).histogram,
                                2 * self.m.cumulative_density(1).histogram)