def main(): """ Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10. Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2. """ sleep_time = 0.02 p = SequentialQueuedPipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), ], n_workers_per_step=8, max_queue_size=10, batch_size=10) a = time.time() outputs_streaming = p.transform(list(range(100))) b = time.time() time_queued_pipeline = b - a print('SequentialQueuedPipeline') print('execution time: {} seconds'.format(time_queued_pipeline)) """ Process data inputs sequentially. For each data input, sleep 0.02 seconds, and then multiply by 2. """ p = Pipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), ]) a = time.time() outputs_vanilla = p.transform(list(range(100))) b = time.time() time_vanilla_pipeline = b - a print('VanillaPipeline') print('execution time: {} seconds'.format(time_vanilla_pipeline)) assert time_queued_pipeline < time_vanilla_pipeline assert np.array_equal(outputs_streaming, outputs_vanilla)
def test_parallel_queued_parallelize_correctly(): sleep_time = 0.001 p = SequentialQueuedPipeline([ ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])) ], batch_size=10) a = time.time() outputs_streaming = p.transform(list(range(100))) b = time.time() time_queued_pipeline = b - a p = Pipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]) ]) a = time.time() outputs_vanilla = p.transform(list(range(100))) b = time.time() time_vanilla_pipeline = b - a assert time_queued_pipeline < time_vanilla_pipeline assert np.array_equal(outputs_streaming, outputs_vanilla)
def test_queued_pipeline_with_included_incomplete_batch_that_raises_an_exception( ): with pytest.raises(AttributeError): p = SequentialQueuedPipeline( [MultiplyByN(2), MultiplyByN(2), MultiplyByN(2), MultiplyByN(2)], batch_size=10, keep_incomplete_batch=True, default_value_data_inputs= None, # this will raise an exception in the worker default_value_expected_outputs= None, # this will raise an exception in the worker n_workers_per_step=1, max_queue_size=5) p.transform(list(range(15)))
def test_queued_pipeline_with_n_workers_step(): p = SequentialQueuedPipeline([(1, MultiplyByN(2)), (1, MultiplyByN(2)), (1, MultiplyByN(2)), (1, MultiplyByN(2))], batch_size=10, max_queue_size=5) outputs = p.transform(list(range(100))) assert np.array_equal(outputs, EXPECTED_OUTPUTS)
def test_queued_pipeline_with_step_name_n_worker_max_queue_size(): p = SequentialQueuedPipeline([('1', 1, 5, MultiplyByN(2)), ('2', 1, 5, MultiplyByN(2)), ('3', 1, 5, MultiplyByN(2)), ('4', 1, 5, MultiplyByN(2))], batch_size=10) outputs = p.transform(list(range(100))) assert np.array_equal(outputs, EXPECTED_OUTPUTS)
def test_parallel_queued_parallelize_correctly(tmpdir, use_processes, use_savers): sleep_time = 0.01 p = SequentialQueuedPipeline( [('1', 2, 10, Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('2', 2, 10, Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('3', 2, 10, Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('4', 2, 10, Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]))], batch_size=10, use_processes=use_processes, use_savers=use_savers).with_context(ExecutionContext(tmpdir)) a = time.time() outputs_streaming = p.transform(list(range(100))) b = time.time() time_queued_pipeline = b - a p = Pipeline([ Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]) ]) a = time.time() outputs_vanilla = p.transform(list(range(100))) b = time.time() time_vanilla_pipeline = b - a assert time_queued_pipeline < time_vanilla_pipeline assert np.array_equal(outputs_streaming, outputs_vanilla)
def test_queued_pipeline_with_excluded_incomplete_batch(): p = SequentialQueuedPipeline([ MultiplyByN(2), MultiplyByN(2), MultiplyByN(2), MultiplyByN(2) ], batch_size=10, include_incomplete_batch=False, n_workers_per_step=1, max_queue_size=5) outputs = p.transform(list(range(15))) assert np.array_equal(outputs, np.array(list(range(10))) * 2 * 2 * 2 * 2)
def test_queued_pipeline_with_included_incomplete_batch(): p = SequentialQueuedPipeline( [MultiplyByN(2), MultiplyByN(2), MultiplyByN(2), MultiplyByN(2)], batch_size=10, keep_incomplete_batch=True, default_value_data_inputs=AbsentValuesNullObject(), default_value_expected_outputs=AbsentValuesNullObject(), n_workers_per_step=1, max_queue_size=5) outputs = p.transform(list(range(15))) assert np.array_equal(outputs, np.array(list(range(15))) * 2 * 2 * 2 * 2)