def get_test_histograms2(): """ Get set 2 of test histograms """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() # building 1d-, 2d-histogram (iteratively) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.Bin(5, 0, 5, unit('A')) hist4 = hg.Categorize(unit('C'), value=hist3) # fill them hist1.fill.numpy(df) hist2.fill.numpy(df) hist3.fill.numpy(df) hist4.fill.numpy(df) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) hc4 = HistogramContainer(hist4) return df, hc1, hc2, hc3, hc4
def test_project_on_x(): df = get_test_data() hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.Bin(5, 0, 5, unit('A')) hist4 = hg.Categorize(unit('C'), value=hist3) for hist in [hist1, hist2, hist3, hist4]: hist.fill.numpy(df) histA = project_on_x(hist2) histC = project_on_x(hist4) bin_edgesA = histA.bin_edges() bin_entriesA = histA.bin_entries() bin_edges3 = hist3.bin_edges() bin_entries3 = hist3.bin_entries() bin_labelsC = histC.bin_labels() bin_entriesC = histC.bin_entries() bin_labels1 = hist1.bin_labels() bin_entries1 = hist1.bin_entries(bin_labelsC) # match order of labels np.testing.assert_array_equal(bin_edgesA, bin_edges3) np.testing.assert_array_equal(bin_entriesA, bin_entries3) np.testing.assert_array_equal(sorted(bin_labelsC), sorted(bin_labels1)) np.testing.assert_array_equal(bin_entriesC, bin_entries1)
def test_assert_similar_hists(): """ Test assert on similarity of list of histograms Check similarity of: type, n-dim, sub-hists, specific type attributes """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df['date'] = df['D'].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Bin(5, 0, 5, unit('A')) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.Categorize(unit('C'), value=hist0) hist4 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) hist5 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist3) # fill them for hist in [hist0, hist1, hist2, hist3, hist4, hist5]: hist.fill.numpy(df) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) hc4 = HistogramContainer(hist4) hc5 = HistogramContainer(hist5) for hc in [hc0, hc1, hc2, hc3, hc4, hc5]: assert check_similar_hists([hc, hc]) args01 = [''] args23 = [''] args45 = [''] try: assert_similar_hists([hc0, hc1]) except AssertionError as e: args01 = e.args try: assert_similar_hists([hc2, hc3]) except AssertionError as e: args23 = e.args try: assert_similar_hists([hc4, hc5]) except AssertionError as e: args45 = e.args assert args01[0] == 'Input histograms are not all similar.' assert args23[0] == 'Input histograms are not all similar.' assert args45[0] == 'Input histograms are not all similar.'
def get_test_histograms1(): """ Get set 1 of test histograms """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df['date'] = df['D'].apply(to_ns) df['boolT'] = True df['boolF'] = False # building 1d-, 2d-, and 3d-histogram (iteratively) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) # fill them hist1.fill.numpy(df) hist2.fill.numpy(df) hist3.fill.numpy(df) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) return df, hc1, hc2, hc3
def test_prepare_2dgrid(): """ Test preparation of grid for extraction of number of entries for 2d hists """ df, hc1, hc2, hc3 = get_test_histograms1() # building 1d-, 2d-, and 3d-histogram (iteratively) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) # fill them hist1.fill.numpy(df) hist2.fill.numpy(df) hist3.fill.numpy(df) xkeys1, ykeys1 = prepare_2dgrid(hist1) xkeys2, ykeys2 = prepare_2dgrid(hist2) xkeys3, ykeys3 = prepare_2dgrid(hist3) np.testing.assert_array_equal(xkeys1, []) np.testing.assert_array_equal(ykeys1, []) np.testing.assert_array_equal(xkeys2, [0, 1, 2, 3, 4]) np.testing.assert_array_equal(ykeys2, ['foo1', 'foo2', 'foo3', 'foo4', 'foo5']) np.testing.assert_array_equal(xkeys3, [0, 1, 4, 5, 6]) np.testing.assert_array_equal(ykeys3, [0, 1, 2, 3, 4])
def test_check_similar_hists(): """Test similarity of list of histograms Check similarity of: type, n-dim, sub-hists, specific type attributes """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df["date"] = df["D"].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Bin(5, 0, 5, unit("A")) hist1 = hg.Categorize(unit("C")) hist2 = hg.Bin(5, 0, 5, unit("A"), value=hist1) hist3 = hg.Categorize(unit("C"), value=hist0) hist4 = hg.SparselyBin( origin=pd.Timestamp("2009-01-01").value, binWidth=pd.Timedelta(days=1).value, quantity=unit("date"), value=hist2, ) hist5 = hg.SparselyBin( origin=pd.Timestamp("2009-01-01").value, binWidth=pd.Timedelta(days=1).value, quantity=unit("date"), value=hist3, ) # fill them for hist in [hist0, hist1, hist2, hist3, hist4, hist5]: hist.fill.numpy(df) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) hc4 = HistogramContainer(hist4) hc5 = HistogramContainer(hist5) for hc in [hc0, hc1, hc2, hc3, hc4, hc5]: assert check_similar_hists([hc, hc]) assert not check_similar_hists([hc0, hc1]) assert not check_similar_hists([hc2, hc3]) assert not check_similar_hists([hc4, hc5])
def get_histograms(): df = get_test_data() hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) for hist in [hist1, hist2, hist3]: hist.fill.numpy(df) return hist1, hist2, hist3
def test_project_split2dhist_on_axis(): df = get_test_data() histA = hg.Bin(5, 0, 5, unit('A')) histC = hg.Categorize(unit('C')) hist1 = hg.Categorize(unit('C'), value=histA) hist2 = hg.Bin(5, 0, 5, unit('A'), value=histC) histDCA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist1) histDAC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) histDA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=histA) histDC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=histC) for hist in [histDA, histDC, histDCA, histDAC]: hist.fill.numpy(df) # split along date axis splitAC = HistogramContainer(histDAC).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitCA = HistogramContainer(histDCA).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitA0 = HistogramContainer(histDA).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitC0 = HistogramContainer(histDC).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitA1 = project_split2dhist_on_axis(splitAC, 'x') splitA2 = project_split2dhist_on_axis(splitCA, 'y') splitC1 = project_split2dhist_on_axis(splitAC, 'y') splitC2 = project_split2dhist_on_axis(splitCA, 'x') assert len(splitA0) == len(splitA1) assert len(splitA0) == len(splitA2) for key, h0 in splitA0.items(): assert key in splitA1 assert key in splitA2 h1 = splitA1[key] h2 = splitA2[key] bin_edges0 = h0.bin_edges() bin_edges1 = h1.bin_edges() bin_edges2 = h2.bin_edges() bin_entries0 = h0.bin_entries() bin_entries1 = h1.bin_entries() bin_entries2 = h2.bin_entries() np.testing.assert_array_equal(bin_edges0, bin_edges1) np.testing.assert_array_equal(bin_edges0, bin_edges2) np.testing.assert_array_equal(bin_entries0, bin_entries1) np.testing.assert_array_equal(bin_entries0, bin_entries2) assert len(splitC0) == len(splitC1) assert len(splitC0) == len(splitC2) for key, h0 in splitC0.items(): assert key in splitC1 assert key in splitC2 h1 = splitC1[key] h2 = splitC2[key] bin_labels0 = h0.bin_labels() bin_labels1 = h1.bin_labels() bin_labels2 = h2.bin_labels() bin_entries0 = h0.bin_entries() bin_entries1 = h1.bin_entries(bin_labels0) bin_entries2 = h2.bin_entries(bin_labels0) np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels1)) np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels2)) np.testing.assert_array_equal(bin_entries0, bin_entries1) np.testing.assert_array_equal(bin_entries0, bin_entries2)