def setup(self): self.data = [1.0, 1.1, 1.2, 1.3, 2.0, 1.4, 2.3, 2.5, 3.1, 3.5] self.nbins = 5 self.bins = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5] self.left_bin_edges = (1.0,) self.bin_widths = (0.5,) self.hist = collections.Counter({(0,): 5, (2,): 2, (3,): 1, (4,): 1, (5,): 1}) self.default_hist = Histogram() self.hist_nbins = Histogram(n_bins=5) self.hist_nbins_binwidth = Histogram(n_bins=5, bin_width=1.0) self.hist_nbins_range = Histogram(n_bins=5, bin_range=(1.0, 3.5)) self.hist_binwidth_range = Histogram(bin_width=0.5, bin_range=(1.0, 3.5))
def test_cumulative(self): histo = Histogram(n_bins=5) _ = histo.histogram(self.data) cumulative = list(histo.cumulative(None).values()) assert_items_almost_equal(cumulative, [5.0, 5.0, 7.0, 8.0, 9.0, 10.0]) assert_items_almost_equal(histo.cumulative(maximum=1.0), [0.5, 0.5, 0.7, 0.8, 0.9, 1.0])
def test_normalized(self): histo = Histogram(n_bins=5) _ = histo.histogram(self.data) assert (list(histo.normalized().values()) == [1.0, 0.0, 0.4, 0.2, 0.2, 0.2]) assert (list(histo.normalized(raw_probability=True).values()) == [0.5, 0.0, 0.2, 0.1, 0.1, 0.1])
def _ensemble_statistics(self, ensemble, samples, weights=None, force=False): """Calculate stats for a given ensemble: path length, crossing prob In general we do all of these at once because the extra cost of running through the samples twice is worse than doing the extra calculations. Parameters ---------- ensemble: Ensemble samples : iterator over samples """ # figure out which histograms need to updated for this ensemble run_it = [] if not force: # TODO figure out which need to be rerun pass else: run_it = list(self.ensemble_histogram_info.keys()) for hist in run_it: hist_info = self.ensemble_histogram_info[hist] if hist_info.hist_args == {} and self.hist_args[hist] != {}: hist_info.hist_args = self.hist_args[hist] if hist not in self.histograms.keys(): self.histograms[hist] = {} self.histograms[hist][ensemble] = Histogram( **(hist_info.hist_args)) in_ens_samples = (s for s in samples if s.ensemble.__uuid__ == ensemble.__uuid__) hist_data = {} buflen = -1 sample_buf = [] prev_sample = {h: None for h in run_it} prev_result = {h: None for h in run_it} for sample in in_ens_samples: for hist in run_it: if sample is prev_sample[hist]: hist_data_sample = prev_result[hist] else: hist_info = self.ensemble_histogram_info[hist] hist_data_sample = hist_info.f(sample, **hist_info.f_args) prev_result[hist] = hist_data_sample prev_sample[hist] = sample try: hist_data[hist].append(hist_data_sample) except KeyError: hist_data[hist] = [hist_data_sample] for hist in run_it: self.histograms[hist][ensemble].histogram(hist_data[hist], weights) self.histograms[hist][ensemble].name = (hist + " " + self.name + " " + ensemble.name)
def test_xvals(self): histo = Histogram(n_bins=5) _ = histo.histogram(self.data) # need this to set the bins assert histo.left_bin_edges == self.left_bin_edges assert histo.bin_widths == self.bin_widths assert all(histo.xvals("l") == [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]) assert all(histo.xvals("r") == [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]) assert all(histo.xvals("m") == [1.25, 1.75, 2.25, 2.75, 3.25, 3.75])
def test_reverse_cumulative_all_zero_warn(self): histo = Histogram(bin_width=0.5, bin_range=(1.0, 3.5)) histo._histogram = collections.Counter({(0,): 0, (1,): 0}) with pytest.warns(UserWarning, match=r"No non-zero"): rcumul = histo.reverse_cumulative() assert rcumul(3.12) == 0 for val in rcumul.values(): assert val == 0
def test_reverse_cumulative(self): histo = Histogram(n_bins=5) hist = histo.histogram(self.data) rev_cumulative = histo.reverse_cumulative(maximum=None) assert_items_almost_equal(rev_cumulative.values(), [10, 5, 5, 3, 2, 1]) rev_cumulative = histo.reverse_cumulative(maximum=1.0) assert_items_almost_equal(rev_cumulative.values(), [1.0, 0.5, 0.5, 0.3, 0.2, 0.1])
def test_normalized(self): histo = Histogram(n_bins=5) hist = histo.histogram(self.data) assert_items_equal(histo.normalized().values(), [1.0, 0.0, 0.4, 0.2, 0.2, 0.2]) assert_items_equal( histo.normalized(raw_probability=True).values(), [0.5, 0.0, 0.2, 0.1, 0.1, 0.1])
def test_add_data_to_histogram(self): histogram = Histogram(n_bins=5, bin_range=(1.0, 3.5)) hist = histogram.add_data_to_histogram(self.data) assert_equal(histogram.count, 10) assert_items_equal(hist, self.hist) hist2 = histogram.add_data_to_histogram(self.data) assert_items_equal(hist2, hist + hist) assert_equal(histogram.count, 20)
def test_xvals(self): histo = Histogram(n_bins=5) hist = histo.histogram(self.data) # need this to set the bins assert_equal(histo.left_bin_edges, self.left_bin_edges) assert_equal(histo.bin_widths, self.bin_widths) assert_items_equal(histo.xvals("l"), [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]) assert_items_equal(histo.xvals("r"), [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]) assert_items_equal(histo.xvals("m"), [1.25, 1.75, 2.25, 2.75, 3.25, 3.75])
def test_add_data_to_histogram(self): histogram = Histogram(n_bins=5, bin_range=(1.0, 3.5)) hist = histogram.add_data_to_histogram(self.data) assert histogram.count == 10 assert hist == self.hist hist2 = histogram.add_data_to_histogram(self.data) assert hist2 == hist+hist assert histogram.count == 20
def test_compare_parameters(self): assert self.hist_nbins.compare_parameters(None) is False assert (self.hist_nbins_range.compare_parameters( self.hist_binwidth_range) is True) assert (self.hist_binwidth_range.compare_parameters( self.hist_nbins_range) is True) histo = Histogram(n_bins=5) assert self.hist_nbins_range.compare_parameters(histo) is False histo.histogram(self.data) assert self.hist_nbins_range.compare_parameters(histo) is False assert (self.hist_nbins_range.compare_parameters(self.hist_nbins) is False) assert histo.compare_parameters(self.hist_nbins) is False assert self.hist_nbins.compare_parameters(histo) is False
def test_histograms_to_pandas_dataframe(self): data = [1.0, 1.1, 1.2, 1.3, 2.0, 1.4, 2.3, 2.5, 3.1, 3.5] # This length needs to be larger than 10 to see a difference between # str ordering and int ordering hists = [Histogram(n_bins=5) for i in range(11)] for hist in hists: _ = hist.histogram(data) df = histograms_to_pandas_dataframe(hists) # sort like is done in analysis df = df.sort_index(axis=1) # This breaks if the sorting is done based on strings as that will # return [0, 1, 10 ...] instead of [0, 1, 2, ...] for i, c in enumerate(df.columns): assert str(c) == str(i)
def test_compare_parameters(self): assert_equal(self.hist_nbins.compare_parameters(None), False) assert_equal( self.hist_nbins_range.compare_parameters(self.hist_binwidth_range), True) assert_equal( self.hist_binwidth_range.compare_parameters(self.hist_nbins_range), True) histo = Histogram(n_bins=5) assert_equal(self.hist_nbins_range.compare_parameters(histo), False) histo.histogram(self.data) assert_equal(self.hist_nbins_range.compare_parameters(histo), False) assert_equal(self.hist_nbins_range.compare_parameters(self.hist_nbins), False) assert_equal(histo.compare_parameters(self.hist_nbins), False) assert_equal(self.hist_nbins.compare_parameters(histo), False)
def test_normalization(self): histo = Histogram(n_bins=5) _ = histo.histogram(self.data) assert histo._normalization() == 5.0
def test_build_from_data_fail(self): histo = Histogram(n_bins=5) with pytest.raises(RuntimeError, match="called without data"): histo.histogram()
def test_build_from_data_fail(self): histo = Histogram(n_bins=5) histo.histogram()
def test_left_bin_error(self): histo = Histogram(bin_width=0.5, bin_range=(-1.0, 3.5)) histo.histogram([3.5]) assert histo.reverse_cumulative() != 0
def test_normalization(self): histo = Histogram(n_bins=5) hist = histo.histogram(self.data) assert_equal(histo._normalization(), 5.0)