Exemple #1
0
def get_test_histograms2():
    """ Get set 2 of test histograms
    """
    # dummy dataset with mixed types
    # convert timestamp (col D) to nanosec since 1970-1-1
    df = pd.util.testing.makeMixedDataFrame()

    # building 1d-, 2d-histogram (iteratively)
    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.Bin(5, 0, 5, unit('A'))
    hist4 = hg.Categorize(unit('C'), value=hist3)

    # fill them
    hist1.fill.numpy(df)
    hist2.fill.numpy(df)
    hist3.fill.numpy(df)
    hist4.fill.numpy(df)

    hc1 = HistogramContainer(hist1)
    hc2 = HistogramContainer(hist2)
    hc3 = HistogramContainer(hist3)
    hc4 = HistogramContainer(hist4)

    return df, hc1, hc2, hc3, hc4
Exemple #2
0
def test_project_on_x():
    df = get_test_data()
    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.Bin(5, 0, 5, unit('A'))
    hist4 = hg.Categorize(unit('C'), value=hist3)

    for hist in [hist1, hist2, hist3, hist4]:
        hist.fill.numpy(df)

    histA = project_on_x(hist2)
    histC = project_on_x(hist4)

    bin_edgesA = histA.bin_edges()
    bin_entriesA = histA.bin_entries()
    bin_edges3 = hist3.bin_edges()
    bin_entries3 = hist3.bin_entries()

    bin_labelsC = histC.bin_labels()
    bin_entriesC = histC.bin_entries()
    bin_labels1 = hist1.bin_labels()
    bin_entries1 = hist1.bin_entries(bin_labelsC)  # match order of labels

    np.testing.assert_array_equal(bin_edgesA, bin_edges3)
    np.testing.assert_array_equal(bin_entriesA, bin_entries3)
    np.testing.assert_array_equal(sorted(bin_labelsC), sorted(bin_labels1))
    np.testing.assert_array_equal(bin_entriesC, bin_entries1)
Exemple #3
0
def test_assert_similar_hists():
    """ Test assert on similarity of list of histograms

    Check similarity of: type, n-dim, sub-hists, specific type attributes
    """
    # dummy dataset with mixed types
    # convert timestamp (col D) to nanosec since 1970-1-1
    df = pd.util.testing.makeMixedDataFrame()
    df['date'] = df['D'].apply(to_ns)

    # building 1d-, 2d-, and 3d-histogram (iteratively)
    hist0 = hg.Bin(5, 0, 5, unit('A'))
    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.Categorize(unit('C'), value=hist0)

    hist4 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                           binWidth=pd.Timedelta(days=1).value,
                           quantity=unit('date'),
                           value=hist2)
    hist5 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                           binWidth=pd.Timedelta(days=1).value,
                           quantity=unit('date'),
                           value=hist3)
    # fill them
    for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
        hist.fill.numpy(df)

    hc0 = HistogramContainer(hist0)
    hc1 = HistogramContainer(hist1)
    hc2 = HistogramContainer(hist2)
    hc3 = HistogramContainer(hist3)
    hc4 = HistogramContainer(hist4)
    hc5 = HistogramContainer(hist5)

    for hc in [hc0, hc1, hc2, hc3, hc4, hc5]:
        assert check_similar_hists([hc, hc])

    args01 = ['']
    args23 = ['']
    args45 = ['']

    try:
        assert_similar_hists([hc0, hc1])
    except AssertionError as e:
        args01 = e.args

    try:
        assert_similar_hists([hc2, hc3])
    except AssertionError as e:
        args23 = e.args

    try:
        assert_similar_hists([hc4, hc5])
    except AssertionError as e:
        args45 = e.args

    assert args01[0] == 'Input histograms are not all similar.'
    assert args23[0] == 'Input histograms are not all similar.'
    assert args45[0] == 'Input histograms are not all similar.'
Exemple #4
0
def get_test_histograms1():
    """ Get set 1 of test histograms
    """
    # dummy dataset with mixed types
    # convert timestamp (col D) to nanosec since 1970-1-1
    df = pd.util.testing.makeMixedDataFrame()
    df['date'] = df['D'].apply(to_ns)
    df['boolT'] = True
    df['boolF'] = False

    # building 1d-, 2d-, and 3d-histogram (iteratively)
    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                           binWidth=pd.Timedelta(days=1).value,
                           quantity=unit('date'),
                           value=hist2)
    # fill them
    hist1.fill.numpy(df)
    hist2.fill.numpy(df)
    hist3.fill.numpy(df)

    hc1 = HistogramContainer(hist1)
    hc2 = HistogramContainer(hist2)
    hc3 = HistogramContainer(hist3)

    return df, hc1, hc2, hc3
Exemple #5
0
def test_prepare_2dgrid():
    """ Test preparation of grid for extraction of number of entries for 2d hists
    """
    df, hc1, hc2, hc3 = get_test_histograms1()

    # building 1d-, 2d-, and 3d-histogram (iteratively)
    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                           binWidth=pd.Timedelta(days=1).value,
                           quantity=unit('date'),
                           value=hist2)
    # fill them
    hist1.fill.numpy(df)
    hist2.fill.numpy(df)
    hist3.fill.numpy(df)

    xkeys1, ykeys1 = prepare_2dgrid(hist1)
    xkeys2, ykeys2 = prepare_2dgrid(hist2)
    xkeys3, ykeys3 = prepare_2dgrid(hist3)

    np.testing.assert_array_equal(xkeys1, [])
    np.testing.assert_array_equal(ykeys1, [])
    np.testing.assert_array_equal(xkeys2, [0, 1, 2, 3, 4])
    np.testing.assert_array_equal(ykeys2,
                                  ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'])
    np.testing.assert_array_equal(xkeys3, [0, 1, 4, 5, 6])
    np.testing.assert_array_equal(ykeys3, [0, 1, 2, 3, 4])
def test_check_similar_hists():
    """Test similarity of list of histograms

    Check similarity of: type, n-dim, sub-hists, specific type attributes
    """
    # dummy dataset with mixed types
    # convert timestamp (col D) to nanosec since 1970-1-1
    df = pd.util.testing.makeMixedDataFrame()
    df["date"] = df["D"].apply(to_ns)

    # building 1d-, 2d-, and 3d-histogram (iteratively)
    hist0 = hg.Bin(5, 0, 5, unit("A"))
    hist1 = hg.Categorize(unit("C"))
    hist2 = hg.Bin(5, 0, 5, unit("A"), value=hist1)
    hist3 = hg.Categorize(unit("C"), value=hist0)
    hist4 = hg.SparselyBin(
        origin=pd.Timestamp("2009-01-01").value,
        binWidth=pd.Timedelta(days=1).value,
        quantity=unit("date"),
        value=hist2,
    )
    hist5 = hg.SparselyBin(
        origin=pd.Timestamp("2009-01-01").value,
        binWidth=pd.Timedelta(days=1).value,
        quantity=unit("date"),
        value=hist3,
    )
    # fill them
    for hist in [hist0, hist1, hist2, hist3, hist4, hist5]:
        hist.fill.numpy(df)

    hc0 = HistogramContainer(hist0)
    hc1 = HistogramContainer(hist1)
    hc2 = HistogramContainer(hist2)
    hc3 = HistogramContainer(hist3)
    hc4 = HistogramContainer(hist4)
    hc5 = HistogramContainer(hist5)

    for hc in [hc0, hc1, hc2, hc3, hc4, hc5]:
        assert check_similar_hists([hc, hc])

    assert not check_similar_hists([hc0, hc1])
    assert not check_similar_hists([hc2, hc3])
    assert not check_similar_hists([hc4, hc5])
Exemple #7
0
def get_histograms():
    df = get_test_data()

    hist1 = hg.Categorize(unit('C'))
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=hist1)
    hist3 = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                           binWidth=pd.Timedelta(days=1).value,
                           quantity=unit('date'),
                           value=hist2)

    for hist in [hist1, hist2, hist3]:
        hist.fill.numpy(df)

    return hist1, hist2, hist3
Exemple #8
0
def test_project_split2dhist_on_axis():
    df = get_test_data()

    histA = hg.Bin(5, 0, 5, unit('A'))
    histC = hg.Categorize(unit('C'))
    hist1 = hg.Categorize(unit('C'), value=histA)
    hist2 = hg.Bin(5, 0, 5, unit('A'), value=histC)

    histDCA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                             binWidth=pd.Timedelta(days=1).value,
                             quantity=unit('date'),
                             value=hist1)
    histDAC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                             binWidth=pd.Timedelta(days=1).value,
                             quantity=unit('date'),
                             value=hist2)

    histDA = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                            binWidth=pd.Timedelta(days=1).value,
                            quantity=unit('date'),
                            value=histA)
    histDC = hg.SparselyBin(origin=pd.Timestamp('2009-01-01').value,
                            binWidth=pd.Timedelta(days=1).value,
                            quantity=unit('date'),
                            value=histC)

    for hist in [histDA, histDC, histDCA, histDAC]:
        hist.fill.numpy(df)

    # split along date axis
    splitAC = HistogramContainer(histDAC).split_hist_along_first_dimension(
        xname='x', yname='y', short_keys=True, convert_time_index=True)
    splitCA = HistogramContainer(histDCA).split_hist_along_first_dimension(
        xname='x', yname='y', short_keys=True, convert_time_index=True)
    splitA0 = HistogramContainer(histDA).split_hist_along_first_dimension(
        xname='x', yname='y', short_keys=True, convert_time_index=True)
    splitC0 = HistogramContainer(histDC).split_hist_along_first_dimension(
        xname='x', yname='y', short_keys=True, convert_time_index=True)

    splitA1 = project_split2dhist_on_axis(splitAC, 'x')
    splitA2 = project_split2dhist_on_axis(splitCA, 'y')
    splitC1 = project_split2dhist_on_axis(splitAC, 'y')
    splitC2 = project_split2dhist_on_axis(splitCA, 'x')

    assert len(splitA0) == len(splitA1)
    assert len(splitA0) == len(splitA2)

    for key, h0 in splitA0.items():
        assert key in splitA1
        assert key in splitA2
        h1 = splitA1[key]
        h2 = splitA2[key]
        bin_edges0 = h0.bin_edges()
        bin_edges1 = h1.bin_edges()
        bin_edges2 = h2.bin_edges()
        bin_entries0 = h0.bin_entries()
        bin_entries1 = h1.bin_entries()
        bin_entries2 = h2.bin_entries()
        np.testing.assert_array_equal(bin_edges0, bin_edges1)
        np.testing.assert_array_equal(bin_edges0, bin_edges2)
        np.testing.assert_array_equal(bin_entries0, bin_entries1)
        np.testing.assert_array_equal(bin_entries0, bin_entries2)

    assert len(splitC0) == len(splitC1)
    assert len(splitC0) == len(splitC2)

    for key, h0 in splitC0.items():
        assert key in splitC1
        assert key in splitC2
        h1 = splitC1[key]
        h2 = splitC2[key]
        bin_labels0 = h0.bin_labels()
        bin_labels1 = h1.bin_labels()
        bin_labels2 = h2.bin_labels()
        bin_entries0 = h0.bin_entries()
        bin_entries1 = h1.bin_entries(bin_labels0)
        bin_entries2 = h2.bin_entries(bin_labels0)
        np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels1))
        np.testing.assert_array_equal(sorted(bin_labels0), sorted(bin_labels2))
        np.testing.assert_array_equal(bin_entries0, bin_entries1)
        np.testing.assert_array_equal(bin_entries0, bin_entries2)