Example #1
0
def test_random_split(data):
    dataframes = list(random_split(data))
    assert len(dataframes) > 1
    concatenated = pd.concat(dataframes)
    assert len(concatenated) == len(data), "We did not lose anything"
    assert np.allclose(
        concatenated.values, data.values), "We did not corrupt the data"
Example #2
0
 def test_multiple_chunks(self, data):
     chunks = random_split(data)
     chopper = TimeChopper(chunks, 5)
     result = list(chopper)
     assert len(result) == MAX_TS
     concatinated = pd.concat(r[1] for r in result)
     assert len(data) == len(concatinated), "We did not lose anything"
Example #3
0
 def test_multiple_chunks(self, data):
     chunks = random_split(data)
     chopper = TimeChopper(chunks, 5)
     result = list(chopper)
     assert len(result) == MAX_TS
     concatinated = pd.concat(r[1] for r in result)
     assert len(data) == len(concatinated), "We did not lose anything"
Example #4
0
    def test_partially_reversed_data(self, data):
        results_queue = Queue()
        results = []
        chunks = list(random_split(data))
        chunks[5], chunks[6] = chunks[6], chunks[5]

        pipeline = Aggregator(TimeChopper(DataPoller(source=chunks, poll_period=0.1), cache_size=3), AGGR_CONFIG, False)
        drain = Drain(pipeline, results_queue)
        drain.run()
        assert results_queue.qsize() == MAX_TS
Example #5
0
 def test_partially_reversed_data(self, data):
     chunks = list(random_split(data))
     chunks[5], chunks[6] = chunks[6], chunks[5]
     chopper = TimeChopper(chunks, 5)
     result = list(chopper)
     assert len(result) == MAX_TS, "DataFrame is splitted into proper number of chunks"
     concatinated = pd.concat(r[1] for r in result)
     assert len(data) == len(concatinated), "We did not lose anything"
     assert np.allclose(concatinated.values,
                        data.values), "We did not corrupt the data"
    def test_partially_reversed_data(self, data):
        results_queue = Queue()
        chunks = list(random_split(data))
        chunks[5], chunks[6] = chunks[6], chunks[5]

        pipeline = Aggregator(
            TimeChopper(DataPoller(source=chunks, poll_period=0.1),
                        cache_size=3), AGGR_CONFIG, False)
        drain = Drain(pipeline, results_queue)
        drain.run()
        assert results_queue.qsize() == MAX_TS
Example #7
0
 def test_partially_reversed_data(self, data):
     chunks = list(random_split(data))
     chunks[5], chunks[6] = chunks[6], chunks[5]
     chopper = TimeChopper(chunks, 5)
     result = list(chopper)
     assert (len(result) == MAX_TS,
             "DataFrame is splitted into proper number of chunks")
     concatinated = pd.concat(r[1] for r in result)
     assert len(data) == len(concatinated), "We did not lose anything"
     assert np.allclose(concatinated.values,
                        data.values), "We did not corrupt the data"
    def test_slow_producer(self, data):
        results_queue = Queue()
        chunks = list(random_split(data))
        chunks[5], chunks[6] = chunks[6], chunks[5]

        def producer():
            for chunk in chunks:
                if np.random.random() > 0.5:
                    yield None
                yield chunk

        pipeline = Aggregator(
            TimeChopper(DataPoller(source=producer(), poll_period=0.1),
                        cache_size=3), AGGR_CONFIG, False)
        drain = Drain(pipeline, results_queue)
        drain.run()
        assert results_queue.qsize() == MAX_TS
Example #9
0
    def test_slow_producer(self, data):
        results_queue = Queue()
        results = []
        chunks = list(random_split(data))
        chunks[5], chunks[6] = chunks[6], chunks[5]

        def producer():
            for chunk in chunks:
                if np.random.random() > 0.5:
                    yield None
                yield chunk

        pipeline = Aggregator(
            TimeChopper(DataPoller(source=producer(), poll_period=0.1), cache_size=3), AGGR_CONFIG, False
        )
        drain = Drain(pipeline, results_queue)
        drain.run()
        assert results_queue.qsize() == MAX_TS