Exemple #1
0
def test_get_next_chunk(entityset):
    times = list([datetime(2011, 4, 9, 10, 30, i * 6) for i in range(5)] +
                 [datetime(2011, 4, 9, 10, 31, i * 9) for i in range(4)] +
                 [datetime(2011, 4, 9, 10, 40, 0)] +
                 [datetime(2011, 4, 10, 10, 40, i) for i in range(2)] +
                 [datetime(2011, 4, 10, 10, 41, i * 3) for i in range(3)] +
                 [datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 4)]
    assert len(chunks) == 5

    # test when a cutoff time is larger than a chunk
    times = list([datetime(2011, 4, 9, 10, 30, 6) for i in range(5)] +
                 [datetime(2011, 4, 9, 10, 31, 9) for i in range(4)] +
                 [datetime(2011, 4, 9, 10, 40, 0)] +
                 [datetime(2011, 4, 10, 10, 40, i) for i in range(2)] +
                 [datetime(2011, 4, 10, 10, 41, i * 3) for i in range(3)] +
                 [datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 4)]
    assert len(chunks) == 5
    # largest cutoff time handled first
    largest = pd.Series([datetime(2011, 4, 9, 10, 30, 6) for i in range(4)])
    assert (chunks[0]['time'] == largest).all()
    # additional part of cutoff time added to another chunk
    assert (chunks[2]['time'] == times[4]).any()

    # test when cutoff_time is smaller than num_per_chunk
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 18)]
    assert len(chunks) == 1
def test_get_next_chunk(entityset):
    times = list([datetime(2011, 4, 9, 10, 30, i * 6) for i in range(5)] +
                 [datetime(2011, 4, 9, 10, 31, i * 9) for i in range(4)] +
                 [datetime(2011, 4, 9, 10, 40, 0)] +
                 [datetime(2011, 4, 10, 10, 40, i) for i in range(2)] +
                 [datetime(2011, 4, 10, 10, 41, i * 3) for i in range(3)] +
                 [datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 4)]
    assert len(chunks) == 5

    # test when a cutoff time is larger than a chunk
    times = list([datetime(2011, 4, 9, 10, 30, 6) for i in range(5)] +
                 [datetime(2011, 4, 9, 10, 31, 9) for i in range(4)] +
                 [datetime(2011, 4, 9, 10, 40, 0)] +
                 [datetime(2011, 4, 10, 10, 40, i) for i in range(2)] +
                 [datetime(2011, 4, 10, 10, 41, i * 3) for i in range(3)] +
                 [datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 4)]
    assert len(chunks) == 5
    # largest cutoff time handled first
    largest = pd.Series([datetime(2011, 4, 9, 10, 30, 6) for i in range(4)])
    assert (chunks[0]['time'] == largest).all()
    # additional part of cutoff time added to another chunk
    assert (chunks[2]['time'] == times[4]).any()

    # test when cutoff_time is smaller than num_per_chunk
    chunks = [chunk for chunk in get_next_chunk(cutoff_time, 'time', 18)]
    assert len(chunks) == 1