def test_assert_similar_hists(): """ Test assert on similarity of list of histograms Check similarity of: type, n-dim, sub-hists, specific type attributes """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df['date'] = df['D'].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Bin(5, 0, 5, unit('A')) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.Categorize(unit('C'), value=hist0) hist4 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) hist5 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist3) # fill them for hist in [hist0, hist1, hist2, hist3, hist4, hist5]: hist.fill.numpy(df) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) hc4 = HistogramContainer(hist4) hc5 = HistogramContainer(hist5) for hc in [hc0, hc1, hc2, hc3, hc4, hc5]: assert check_similar_hists([hc, hc]) args01 = [''] args23 = [''] args45 = [''] try: assert_similar_hists([hc0, hc1]) except AssertionError as e: args01 = e.args try: assert_similar_hists([hc2, hc3]) except AssertionError as e: args23 = e.args try: assert_similar_hists([hc4, hc5]) except AssertionError as e: args45 = e.args assert args01[0] == 'Input histograms are not all similar.' assert args23[0] == 'Input histograms are not all similar.' assert args45[0] == 'Input histograms are not all similar.'
def test_get_consistent_numpy_entries(): """ Test extraction of number of entries When first making bin_edges of input histograms consistent to each other. """ df1 = pd.DataFrame({ 'A': [0, 1, 2, 3, 4, 3, 2, 1, 1, 1], 'C': ['f1', 'f3', 'f4', 'f3', 'f4', 'f2', 'f2', 'f1', 'f3', 'f4'] }) df2 = pd.DataFrame({ 'A': [2, 3, 4, 5, 7, 4, 6, 5, 7, 8], 'C': ['f7', 'f3', 'f5', 'f8', 'f9', 'f2', 'f3', 'f6', 'f7', 'f7'] }) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = HistogramContainer(hg.Categorize(unit('C'))) hist1 = HistogramContainer(hg.Categorize(unit('C'))) hist2 = HistogramContainer( hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A'))) hist3 = HistogramContainer( hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A'))) # fill them for hist, df in zip([hist0, hist1, hist2, hist3], [df1, df2, df1, df2]): hist.hist.fill.numpy(df) e0, e1 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=False) _, labels01 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=True) e2, e3 = get_consistent_numpy_entries([hist2, hist3], get_bin_labels=False) _, centers23 = get_consistent_numpy_entries([hist2, hist3], get_bin_labels=True) entries0 = [2., 2., 3., 3., 0., 0., 0., 0., 0.] entries1 = [0., 1., 2., 0., 1., 1., 3., 1., 1.] labels = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9'] entries2 = [1., 4., 2., 2., 1., 0., 0., 0., 0.] entries3 = [0., 0., 1., 1., 2., 2., 1., 2., 1.] centers = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5] np.testing.assert_array_equal(e0, entries0) np.testing.assert_array_equal(e1, entries1) np.testing.assert_array_equal(labels01, labels) np.testing.assert_array_equal(e2, entries2) np.testing.assert_array_equal(e3, entries3) np.testing.assert_array_equal(centers23, centers)
def test_get_consistent_numpy_entries(): """Test extraction of number of entries When first making bin_edges of input histograms consistent to each other. """ df1 = pd.DataFrame({ "A": [0, 1, 2, 3, 4, 3, 2, 1, 1, 1], "C": ["f1", "f3", "f4", "f3", "f4", "f2", "f2", "f1", "f3", "f4"], }) df2 = pd.DataFrame({ "A": [2, 3, 4, 5, 7, 4, 6, 5, 7, 8], "C": ["f7", "f3", "f5", "f8", "f9", "f2", "f3", "f6", "f7", "f7"], }) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = HistogramContainer(hg.Categorize(unit("C"))) hist1 = HistogramContainer(hg.Categorize(unit("C"))) hist2 = HistogramContainer( hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))) hist3 = HistogramContainer( hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"))) # fill them for hist, df in zip([hist0, hist1, hist2, hist3], [df1, df2, df1, df2]): hist.hist.fill.numpy(df) e0, e1 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=False) _, labels01 = get_consistent_numpy_entries([hist0, hist1], get_bin_labels=True) e2, e3 = get_consistent_numpy_entries([hist2, hist3], get_bin_labels=False) _, centers23 = get_consistent_numpy_entries([hist2, hist3], get_bin_labels=True) entries0 = [2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0] entries1 = [0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0] labels = ["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9"] entries2 = [1.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0] entries3 = [0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0] centers = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5] np.testing.assert_array_equal(e0, entries0) np.testing.assert_array_equal(e1, entries1) np.testing.assert_array_equal(labels01, labels) np.testing.assert_array_equal(e2, entries2) np.testing.assert_array_equal(e3, entries3) np.testing.assert_array_equal(centers23, centers)
def get_test_histograms1(): """ Get set 1 of test histograms """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df['date'] = df['D'].apply(to_ns) df['boolT'] = True df['boolF'] = False # building 1d-, 2d-, and 3d-histogram (iteratively) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) # fill them hist1.fill.numpy(df) hist2.fill.numpy(df) hist3.fill.numpy(df) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) return df, hc1, hc2, hc3
def test_prepare_2dgrid(): """ Test preparation of grid for extraction of number of entries for 2d hists """ df, hc1, hc2, hc3 = get_test_histograms1() # building 1d-, 2d-, and 3d-histogram (iteratively) hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) # fill them hist1.fill.numpy(df) hist2.fill.numpy(df) hist3.fill.numpy(df) xkeys1, ykeys1 = prepare_2dgrid(hist1) xkeys2, ykeys2 = prepare_2dgrid(hist2) xkeys3, ykeys3 = prepare_2dgrid(hist3) np.testing.assert_array_equal(xkeys1, []) np.testing.assert_array_equal(ykeys1, []) np.testing.assert_array_equal(xkeys2, [0, 1, 2, 3, 4]) np.testing.assert_array_equal(ykeys2, ['foo1', 'foo2', 'foo3', 'foo4', 'foo5']) np.testing.assert_array_equal(xkeys3, [0, 1, 4, 5, 6]) np.testing.assert_array_equal(ykeys3, [0, 1, 2, 3, 4])
def test_check_similar_hists(): """Test similarity of list of histograms Check similarity of: type, n-dim, sub-hists, specific type attributes """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 df = pd.util.testing.makeMixedDataFrame() df["date"] = df["D"].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Bin(5, 0, 5, unit("A")) hist1 = hg.Categorize(unit("C")) hist2 = hg.Bin(5, 0, 5, unit("A"), value=hist1) hist3 = hg.Categorize(unit("C"), value=hist0) hist4 = hg.SparselyBin( origin=pd.Timestamp("2009-01-01").value, binWidth=pd.Timedelta(days=1).value, quantity=unit("date"), value=hist2, ) hist5 = hg.SparselyBin( origin=pd.Timestamp("2009-01-01").value, binWidth=pd.Timedelta(days=1).value, quantity=unit("date"), value=hist3, ) # fill them for hist in [hist0, hist1, hist2, hist3, hist4, hist5]: hist.fill.numpy(df) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) hc3 = HistogramContainer(hist3) hc4 = HistogramContainer(hist4) hc5 = HistogramContainer(hist5) for hc in [hc0, hc1, hc2, hc3, hc4, hc5]: assert check_similar_hists([hc, hc]) assert not check_similar_hists([hc0, hc1]) assert not check_similar_hists([hc2, hc3]) assert not check_similar_hists([hc4, hc5])
def test_get_consistent_numpy_1dhists(): """ Test extraction of number of entries and bin-edges/labels When first making bin_edges/bin-labels of input histograms consistent to each other. """ df1 = pd.DataFrame({'A': [0, 1, 2, 3, 4, 3, 2, 1, 1, 1]}) df2 = pd.DataFrame({'A': [2, 3, 4, 5, 7, 4, 6, 5, 7, 8]}) # building 1d-, 2d-, and 3d-histogram (iteratively) hist1 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A')) hist2 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A')) # fill them hist1.fill.numpy(df1) hist2.fill.numpy(df2) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) nphist1, nphist2 = get_consistent_numpy_1dhists([hc1, hc2], get_bin_labels=False) nphist_list, centers = get_consistent_numpy_1dhists([hc1, hc2], get_bin_labels=True) entries1 = [1., 4., 2., 2., 1., 0., 0., 0., 0.] entries2 = [0., 0., 1., 1., 2., 2., 1., 2., 1.] bin_edges = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.] bin_centers = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5] np.testing.assert_array_equal(nphist1[0], entries1) np.testing.assert_array_equal(nphist1[1], bin_edges) np.testing.assert_array_equal(nphist2[0], entries2) np.testing.assert_array_equal(nphist2[1], bin_edges) np.testing.assert_array_equal(nphist_list[0][0], entries1) np.testing.assert_array_equal(nphist_list[0][1], bin_edges) np.testing.assert_array_equal(nphist_list[1][0], entries2) np.testing.assert_array_equal(nphist_list[1][1], bin_edges) np.testing.assert_array_equal(centers, bin_centers)
def get_histograms(): df = get_test_data() hist1 = hg.Categorize(unit('C')) hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1) hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) for hist in [hist1, hist2, hist3]: hist.fill.numpy(df) return hist1, hist2, hist3
def test_get_consistent_numpy_2dgrids(): """ Test extraction of number of entries for 2d hists When first making bin_edges of input histograms consistent to each other. """ df1 = pd.DataFrame({ 'A': [0, 1, 2, 3, 4, 3, 2, 1, 1, 1], 'C': ['f1', 'f3', 'f4', 'f3', 'f4', 'f2', 'f2', 'f1', 'f3', 'f4'] }) df2 = pd.DataFrame({ 'A': [2, 3, 4, 5, 7, 4, 6, 5, 7, 8], 'C': ['f7', 'f3', 'f5', 'f8', 'f9', 'f2', 'f3', 'f6', 'f7', 'f7'] }) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Categorize(unit('C')) hist1 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A'), value=hist0) hist2 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit('A'), value=hist0) # fill them hist0.fill.numpy(df1) hist1.fill.numpy(df1) hist2.fill.numpy(df2) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) args = [''] try: get_consistent_numpy_2dgrids([hc0, hc0]) except AssertionError as e: args = e.args grid2d_list = get_consistent_numpy_2dgrids([hc1, hc2]) g1 = np.asarray([[1., 1., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 0., 0., 0., 0., 0.], [0., 2., 0., 1., 0., 0., 0., 0., 0.], [0., 1., 1., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) g2 = np.asarray([[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 1., 1.], [0., 0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 1., 0.]]) grid2d_comp = [g1, g2] # MB 20190828: not sure if this is the right way to test for exceptions. assert args[ 0] == 'Input histogram only has 1 dimensions (<2). Cannot compute 2d-grid.' for i in range(2): assert (grid2d_list[i] == grid2d_comp[i]).all()
def test_project_split2dhist_on_axis(): df = get_test_data() histA = hg.Bin(5, 0, 5, unit('A')) histC = hg.Categorize(unit('C')) hist1 = hg.Categorize(unit('C'), value=histA) hist2 = hg.Bin(5, 0, 5, unit('A'), value=histC) histDCA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist1) histDAC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=hist2) histDA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=histA) histDC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value, binWidth=pd.Timedelta(days=1).value, quantity=unit('date'), value=histC) for hist in [histDA, histDC, histDCA, histDAC]: hist.fill.numpy(df) # split along date axis splitAC = HistogramContainer(histDAC).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitCA = HistogramContainer(histDCA).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitA0 = HistogramContainer(histDA).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitC0 = HistogramContainer(histDC).split_hist_along_first_dimension( xname='x', yname='y', short_keys=True, convert_time_index=True) splitA1 = project_split2dhist_on_axis(splitAC, 'x') splitA2 = project_split2dhist_on_axis(splitCA, 'y') splitC1 = project_split2dhist_on_axis(splitAC, 'y') splitC2 = project_split2dhist_on_axis(splitCA, 'x') assert len(splitA0) == len(splitA1) assert len(splitA0) == len(splitA2) for key, h0 in splitA0.items(): assert key in splitA1 assert key in splitA2 h1 = splitA1[key] h2 = splitA2[key] bin_edges0 = h0.bin_edges() bin_edges1 = h1.bin_edges() bin_edges2 = h2.bin_edges() bin_entries0 = h0.bin_entries() bin_entries1 = h1.bin_entries() bin_entries2 = h2.bin_entries() np.testing.assert_array_equal(bin_edges0, bin_edges1) np.testing.assert_array_equal(bin_edges0, bin_edges2) np.testing.assert_array_equal(bin_entries0, bin_entries1) np.testing.assert_array_equal(bin_entries0, bin_entries2) assert len(splitC0) == len(splitC1) assert len(splitC0) == len(splitC2) for key, h0 in splitC0.items(): assert key in splitC1 assert key in splitC2 h1 = splitC1[key] h2 = splitC2[key] bin_labels0 = h0.bin_labels() bin_labels1 = h1.bin_labels() bin_labels2 = h2.bin_labels() bin_entries0 = h0.bin_entries() bin_entries1 = h1.bin_entries(bin_labels0) bin_entries2 = h2.bin_entries(bin_labels0) np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels1)) np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels2)) np.testing.assert_array_equal(bin_entries0, bin_entries1) np.testing.assert_array_equal(bin_entries0, bin_entries2)
def test_get_consistent_numpy_2dgrids(): """Test extraction of number of entries for 2d hists When first making bin_edges of input histograms consistent to each other. """ df1 = pd.DataFrame({ "A": [0, 1, 2, 3, 4, 3, 2, 1, 1, 1], "C": ["f1", "f3", "f4", "f3", "f4", "f2", "f2", "f1", "f3", "f4"], }) df2 = pd.DataFrame({ "A": [2, 3, 4, 5, 7, 4, 6, 5, 7, 8], "C": ["f7", "f3", "f5", "f8", "f9", "f2", "f3", "f6", "f7", "f7"], }) # building 1d-, 2d-, and 3d-histogram (iteratively) hist0 = hg.Categorize(unit("C")) hist1 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"), value=hist0) hist2 = hg.SparselyBin(origin=0.0, binWidth=1.0, quantity=unit("A"), value=hist0) # fill them hist0.fill.numpy(df1) hist1.fill.numpy(df1) hist2.fill.numpy(df2) hc0 = HistogramContainer(hist0) hc1 = HistogramContainer(hist1) hc2 = HistogramContainer(hist2) args = [""] try: get_consistent_numpy_2dgrids([hc0, hc0]) except ValueError as e: args = e.args grid2d_list = get_consistent_numpy_2dgrids([hc1, hc2]) g1 = np.asarray([ [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ]) g2 = np.asarray([ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0], ]) grid2d_comp = [g1, g2] # MB 20190828: not sure if this is the right way to test for exceptions. assert ( args[0] == "Input histogram only has 1 dimensions (<2). Cannot compute 2d-grid.") for i in range(2): assert (grid2d_list[i] == grid2d_comp[i]).all()