def main():
    """
    Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10.
    Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2.
    """
    sleep_time = 0.02
    p = SequentialQueuedPipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ], n_workers_per_step=8, max_queue_size=10, batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a
    print('SequentialQueuedPipeline')
    print('execution time: {} seconds'.format(time_queued_pipeline))

    """
    Process data inputs sequentially. 
    For each data input, sleep 0.02 seconds, and then multiply by 2.
    """
    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    print('VanillaPipeline')
    print('execution time: {} seconds'.format(time_vanilla_pipeline))

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Example #2
0
def test_parallel_queued_parallelize_correctly():
    sleep_time = 0.001
    p = SequentialQueuedPipeline([
        ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]))
    ], batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Example #3
0
def test_queued_pipeline_with_included_incomplete_batch_that_raises_an_exception(
):
    with pytest.raises(AttributeError):
        p = SequentialQueuedPipeline(
            [MultiplyByN(2),
             MultiplyByN(2),
             MultiplyByN(2),
             MultiplyByN(2)],
            batch_size=10,
            keep_incomplete_batch=True,
            default_value_data_inputs=
            None,  # this will raise an exception in the worker
            default_value_expected_outputs=
            None,  # this will raise an exception in the worker
            n_workers_per_step=1,
            max_queue_size=5)
        p.transform(list(range(15)))
Example #4
0
def test_queued_pipeline_with_n_workers_step():
    p = SequentialQueuedPipeline([(1, MultiplyByN(2)), (1, MultiplyByN(2)),
                                  (1, MultiplyByN(2)), (1, MultiplyByN(2))],
                                 batch_size=10,
                                 max_queue_size=5)

    outputs = p.transform(list(range(100)))

    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
Example #5
0
def test_queued_pipeline_with_step_name_n_worker_max_queue_size():
    p = SequentialQueuedPipeline([('1', 1, 5, MultiplyByN(2)),
                                  ('2', 1, 5, MultiplyByN(2)),
                                  ('3', 1, 5, MultiplyByN(2)),
                                  ('4', 1, 5, MultiplyByN(2))],
                                 batch_size=10)

    outputs = p.transform(list(range(100)))

    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
Example #6
0
def test_parallel_queued_parallelize_correctly(tmpdir, use_processes,
                                               use_savers):
    sleep_time = 0.01
    p = SequentialQueuedPipeline(
        [('1', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('2', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('3', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('4', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)]))],
        batch_size=10,
        use_processes=use_processes,
        use_savers=use_savers).with_context(ExecutionContext(tmpdir))

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Example #7
0
def test_queued_pipeline_with_excluded_incomplete_batch():
    p = SequentialQueuedPipeline([
        MultiplyByN(2),
        MultiplyByN(2),
        MultiplyByN(2),
        MultiplyByN(2)
    ], batch_size=10, include_incomplete_batch=False, n_workers_per_step=1, max_queue_size=5)

    outputs = p.transform(list(range(15)))

    assert np.array_equal(outputs, np.array(list(range(10))) * 2 * 2 * 2 * 2)
Example #8
0
def test_queued_pipeline_with_included_incomplete_batch():
    p = SequentialQueuedPipeline(
        [MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2)],
        batch_size=10,
        keep_incomplete_batch=True,
        default_value_data_inputs=AbsentValuesNullObject(),
        default_value_expected_outputs=AbsentValuesNullObject(),
        n_workers_per_step=1,
        max_queue_size=5)

    outputs = p.transform(list(range(15)))

    assert np.array_equal(outputs, np.array(list(range(15))) * 2 * 2 * 2 * 2)