def setup(self):
        self.data = [1.0, 1.1, 1.2, 1.3, 2.0, 1.4, 2.3, 2.5, 3.1, 3.5]
        self.nbins = 5
        self.bins = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]
        self.left_bin_edges = (1.0,)
        self.bin_widths = (0.5,)
        self.hist = collections.Counter({(0,): 5, (2,): 2, (3,): 1,
                                         (4,): 1, (5,): 1})

        self.default_hist = Histogram()
        self.hist_nbins = Histogram(n_bins=5)
        self.hist_nbins_binwidth = Histogram(n_bins=5, bin_width=1.0)
        self.hist_nbins_range = Histogram(n_bins=5, bin_range=(1.0, 3.5))
        self.hist_binwidth_range = Histogram(bin_width=0.5,
                                             bin_range=(1.0, 3.5))
 def test_cumulative(self):
     histo = Histogram(n_bins=5)
     _ = histo.histogram(self.data)
     cumulative = list(histo.cumulative(None).values())
     assert_items_almost_equal(cumulative, [5.0, 5.0, 7.0, 8.0, 9.0, 10.0])
     assert_items_almost_equal(histo.cumulative(maximum=1.0),
                               [0.5, 0.5, 0.7, 0.8, 0.9, 1.0])
 def test_normalized(self):
     histo = Histogram(n_bins=5)
     _ = histo.histogram(self.data)
     assert (list(histo.normalized().values()) ==
             [1.0, 0.0, 0.4, 0.2, 0.2, 0.2])
     assert (list(histo.normalized(raw_probability=True).values()) ==
             [0.5, 0.0, 0.2, 0.1, 0.1, 0.1])
    def _ensemble_statistics(self,
                             ensemble,
                             samples,
                             weights=None,
                             force=False):
        """Calculate stats for a given ensemble: path length, crossing prob

        In general we do all of these at once because the extra cost of
        running through the samples twice is worse than doing the extra
        calculations.

        Parameters
        ----------
        ensemble: Ensemble
        samples : iterator over samples
        """
        # figure out which histograms need to updated for this ensemble
        run_it = []
        if not force:
            # TODO figure out which need to be rerun
            pass
        else:
            run_it = list(self.ensemble_histogram_info.keys())

        for hist in run_it:
            hist_info = self.ensemble_histogram_info[hist]
            if hist_info.hist_args == {} and self.hist_args[hist] != {}:
                hist_info.hist_args = self.hist_args[hist]

            if hist not in self.histograms.keys():
                self.histograms[hist] = {}
            self.histograms[hist][ensemble] = Histogram(
                **(hist_info.hist_args))

        in_ens_samples = (s for s in samples
                          if s.ensemble.__uuid__ == ensemble.__uuid__)
        hist_data = {}
        buflen = -1
        sample_buf = []
        prev_sample = {h: None for h in run_it}
        prev_result = {h: None for h in run_it}
        for sample in in_ens_samples:
            for hist in run_it:
                if sample is prev_sample[hist]:
                    hist_data_sample = prev_result[hist]
                else:
                    hist_info = self.ensemble_histogram_info[hist]
                    hist_data_sample = hist_info.f(sample, **hist_info.f_args)
                prev_result[hist] = hist_data_sample
                prev_sample[hist] = sample
                try:
                    hist_data[hist].append(hist_data_sample)
                except KeyError:
                    hist_data[hist] = [hist_data_sample]

        for hist in run_it:
            self.histograms[hist][ensemble].histogram(hist_data[hist], weights)
            self.histograms[hist][ensemble].name = (hist + " " + self.name +
                                                    " " + ensemble.name)
 def test_xvals(self):
     histo = Histogram(n_bins=5)
     _ = histo.histogram(self.data)  # need this to set the bins
     assert histo.left_bin_edges == self.left_bin_edges
     assert histo.bin_widths == self.bin_widths
     assert all(histo.xvals("l") == [1.0, 1.5, 2.0, 2.5, 3.0, 3.5])
     assert all(histo.xvals("r") == [1.5, 2.0, 2.5, 3.0, 3.5, 4.0])
     assert all(histo.xvals("m") == [1.25, 1.75, 2.25, 2.75, 3.25, 3.75])
 def test_reverse_cumulative_all_zero_warn(self):
     histo = Histogram(bin_width=0.5, bin_range=(1.0, 3.5))
     histo._histogram = collections.Counter({(0,): 0, (1,): 0})
     with pytest.warns(UserWarning, match=r"No non-zero"):
         rcumul = histo.reverse_cumulative()
     assert rcumul(3.12) == 0
     for val in rcumul.values():
         assert val == 0
 def test_reverse_cumulative(self):
     histo = Histogram(n_bins=5)
     hist = histo.histogram(self.data)
     rev_cumulative = histo.reverse_cumulative(maximum=None)
     assert_items_almost_equal(rev_cumulative.values(), [10, 5, 5, 3, 2, 1])
     rev_cumulative = histo.reverse_cumulative(maximum=1.0)
     assert_items_almost_equal(rev_cumulative.values(),
                               [1.0, 0.5, 0.5, 0.3, 0.2, 0.1])
 def test_normalized(self):
     histo = Histogram(n_bins=5)
     hist = histo.histogram(self.data)
     assert_items_equal(histo.normalized().values(),
                        [1.0, 0.0, 0.4, 0.2, 0.2, 0.2])
     assert_items_equal(
         histo.normalized(raw_probability=True).values(),
         [0.5, 0.0, 0.2, 0.1, 0.1, 0.1])
    def test_add_data_to_histogram(self):
        histogram = Histogram(n_bins=5, bin_range=(1.0, 3.5))
        hist = histogram.add_data_to_histogram(self.data)
        assert_equal(histogram.count, 10)
        assert_items_equal(hist, self.hist)

        hist2 = histogram.add_data_to_histogram(self.data)
        assert_items_equal(hist2, hist + hist)
        assert_equal(histogram.count, 20)
 def test_xvals(self):
     histo = Histogram(n_bins=5)
     hist = histo.histogram(self.data)  # need this to set the bins
     assert_equal(histo.left_bin_edges, self.left_bin_edges)
     assert_equal(histo.bin_widths, self.bin_widths)
     assert_items_equal(histo.xvals("l"), [1.0, 1.5, 2.0, 2.5, 3.0, 3.5])
     assert_items_equal(histo.xvals("r"), [1.5, 2.0, 2.5, 3.0, 3.5, 4.0])
     assert_items_equal(histo.xvals("m"),
                        [1.25, 1.75, 2.25, 2.75, 3.25, 3.75])
    def test_add_data_to_histogram(self):
        histogram = Histogram(n_bins=5, bin_range=(1.0, 3.5))
        hist = histogram.add_data_to_histogram(self.data)
        assert histogram.count == 10
        assert hist == self.hist

        hist2 = histogram.add_data_to_histogram(self.data)
        assert hist2 == hist+hist
        assert histogram.count == 20
 def test_compare_parameters(self):
     assert self.hist_nbins.compare_parameters(None) is False
     assert (self.hist_nbins_range.compare_parameters(
         self.hist_binwidth_range) is True)
     assert (self.hist_binwidth_range.compare_parameters(
         self.hist_nbins_range) is True)
     histo = Histogram(n_bins=5)
     assert self.hist_nbins_range.compare_parameters(histo) is False
     histo.histogram(self.data)
     assert self.hist_nbins_range.compare_parameters(histo) is False
     assert (self.hist_nbins_range.compare_parameters(self.hist_nbins) is
             False)
     assert histo.compare_parameters(self.hist_nbins) is False
     assert self.hist_nbins.compare_parameters(histo) is False
    def test_histograms_to_pandas_dataframe(self):
        data = [1.0, 1.1, 1.2, 1.3, 2.0, 1.4, 2.3, 2.5, 3.1, 3.5]
        # This length needs to be larger than 10 to see a difference between
        # str ordering and int ordering
        hists = [Histogram(n_bins=5) for i in range(11)]
        for hist in hists:
            _ = hist.histogram(data)
        df = histograms_to_pandas_dataframe(hists)
        # sort like is done in analysis
        df = df.sort_index(axis=1)

        # This breaks if the sorting is done based on strings as that will
        # return [0, 1, 10 ...] instead of [0, 1, 2, ...]
        for i, c in enumerate(df.columns):
            assert str(c) == str(i)
 def test_compare_parameters(self):
     assert_equal(self.hist_nbins.compare_parameters(None), False)
     assert_equal(
         self.hist_nbins_range.compare_parameters(self.hist_binwidth_range),
         True)
     assert_equal(
         self.hist_binwidth_range.compare_parameters(self.hist_nbins_range),
         True)
     histo = Histogram(n_bins=5)
     assert_equal(self.hist_nbins_range.compare_parameters(histo), False)
     histo.histogram(self.data)
     assert_equal(self.hist_nbins_range.compare_parameters(histo), False)
     assert_equal(self.hist_nbins_range.compare_parameters(self.hist_nbins),
                  False)
     assert_equal(histo.compare_parameters(self.hist_nbins), False)
     assert_equal(self.hist_nbins.compare_parameters(histo), False)
 def test_normalization(self):
     histo = Histogram(n_bins=5)
     _ = histo.histogram(self.data)
     assert histo._normalization() == 5.0
 def test_build_from_data_fail(self):
     histo = Histogram(n_bins=5)
     with pytest.raises(RuntimeError, match="called without data"):
         histo.histogram()
 def test_build_from_data_fail(self):
     histo = Histogram(n_bins=5)
     histo.histogram()
 def test_left_bin_error(self):
     histo = Histogram(bin_width=0.5, bin_range=(-1.0, 3.5))
     histo.histogram([3.5])
     assert histo.reverse_cumulative() != 0
 def test_normalization(self):
     histo = Histogram(n_bins=5)
     hist = histo.histogram(self.data)
     assert_equal(histo._normalization(), 5.0)