def main():
    """
    Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10.
    Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2.
    """
    sleep_time = 0.02
    p = SequentialQueuedPipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ], n_workers_per_step=8, max_queue_size=10, batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a
    print('SequentialQueuedPipeline')
    print('execution time: {} seconds'.format(time_queued_pipeline))

    """
    Process data inputs sequentially. 
    For each data input, sleep 0.02 seconds, and then multiply by 2.
    """
    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    print('VanillaPipeline')
    print('execution time: {} seconds'.format(time_vanilla_pipeline))

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Beispiel #2
0
def test_parallel_queued_parallelize_correctly():
    sleep_time = 0.001
    p = SequentialQueuedPipeline([
        ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]))
    ], batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Beispiel #3
0
def test_queued_pipeline_with_n_workers_step():
    p = SequentialQueuedPipeline([(1, MultiplyByN(2)), (1, MultiplyByN(2)),
                                  (1, MultiplyByN(2)), (1, MultiplyByN(2))],
                                 batch_size=10,
                                 max_queue_size=5)

    outputs = p.transform(list(range(100)))

    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
Beispiel #4
0
def test_queued_pipeline_with_step_name_n_worker_max_queue_size():
    p = SequentialQueuedPipeline([('1', 1, 5, MultiplyByN(2)),
                                  ('2', 1, 5, MultiplyByN(2)),
                                  ('3', 1, 5, MultiplyByN(2)),
                                  ('4', 1, 5, MultiplyByN(2))],
                                 batch_size=10)

    outputs = p.transform(list(range(100)))

    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
Beispiel #5
0
def test_queued_pipeline_with_excluded_incomplete_batch():
    p = SequentialQueuedPipeline([
        MultiplyByN(2),
        MultiplyByN(2),
        MultiplyByN(2),
        MultiplyByN(2)
    ], batch_size=10, include_incomplete_batch=False, n_workers_per_step=1, max_queue_size=5)

    outputs = p.transform(list(range(15)))

    assert np.array_equal(outputs, np.array(list(range(10))) * 2 * 2 * 2 * 2)
Beispiel #6
0
def test_queued_pipeline_with_included_incomplete_batch():
    p = SequentialQueuedPipeline(
        [MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2)],
        batch_size=10,
        keep_incomplete_batch=True,
        default_value_data_inputs=AbsentValuesNullObject(),
        default_value_expected_outputs=AbsentValuesNullObject(),
        n_workers_per_step=1,
        max_queue_size=5)

    outputs = p.transform(list(range(15)))

    assert np.array_equal(outputs, np.array(list(range(15))) * 2 * 2 * 2 * 2)
Beispiel #7
0
def main():
    """
    The task is to sleep 0.02 seconds for each data input and then multiply by 2.
    """
    sleep_time = 0.02
    preprocessing_and_model_steps = [ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]

    # Classical pipeline - all at once with one big batch:
    p = Pipeline(preprocessing_and_model_steps)
    time_vanilla_pipeline, output_classical = eval_run_time(p)
    print(f"Classical 'Pipeline' execution time: {time_vanilla_pipeline} seconds.")

    # Classical minibatch pipeline - minibatch size 10:
    p = MiniBatchSequentialPipeline(preprocessing_and_model_steps,
                                    batch_size=10)
    time_minibatch_pipeline, output_minibatch = eval_run_time(p)
    print(f"Minibatched 'MiniBatchSequentialPipeline' execution time: {time_minibatch_pipeline} seconds.")

    # Parallel pipeline - minibatch size 10 with 16 parallel workers per step that
    # have a max queue size of 10 batches between preprocessing and the model:
    p = SequentialQueuedPipeline(preprocessing_and_model_steps,
                                 n_workers_per_step=16, max_queue_size=10, batch_size=10)
    time_parallel_pipeline, output_parallel = eval_run_time(p)
    print(f"Parallel 'SequentialQueuedPipeline' execution time: {time_parallel_pipeline} seconds.")

    assert time_parallel_pipeline < time_minibatch_pipeline, str((time_parallel_pipeline, time_vanilla_pipeline))
    assert np.array_equal(output_classical, output_minibatch)
    assert np.array_equal(output_classical, output_parallel)
Beispiel #8
0
def test_queued_pipeline_with_step_with_threading():
    p = SequentialQueuedPipeline(
        [MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2),
         MultiplyByN(2)],
        batch_size=10,
        n_workers_per_step=1,
        max_queue_size=5,
        use_processes=False)

    data_container = DataContainer(data_inputs=list(range(100)))
    context = ExecutionContext()

    outputs = p.handle_transform(data_container, context)

    assert np.array_equal(outputs.data_inputs, EXPECTED_OUTPUTS)
Beispiel #9
0
def test_queued_pipeline_with_included_incomplete_batch_that_raises_an_exception(
):
    with pytest.raises(AttributeError):
        p = SequentialQueuedPipeline(
            [MultiplyByN(2),
             MultiplyByN(2),
             MultiplyByN(2),
             MultiplyByN(2)],
            batch_size=10,
            keep_incomplete_batch=True,
            default_value_data_inputs=
            None,  # this will raise an exception in the worker
            default_value_expected_outputs=
            None,  # this will raise an exception in the worker
            n_workers_per_step=1,
            max_queue_size=5)
        p.transform(list(range(15)))
Beispiel #10
0
def test_sequential_queued_pipeline_should_fit_transform_without_multiprocessing(
):
    batch_size = 10
    p = SequentialQueuedPipeline(
        [(1,
          FitTransformCallbackStep(
              transform_function=lambda di: np.array(di) * 2)),
         (1,
          FitTransformCallbackStep(
              transform_function=lambda di: np.array(di) * 2)),
         (1,
          FitTransformCallbackStep(
              transform_function=lambda di: np.array(di) * 2)),
         (1,
          FitTransformCallbackStep(
              transform_function=lambda di: np.array(di) * 2))],
        batch_size=batch_size,
        max_queue_size=5)
    queue_joiner_for_test = QueueJoinerForTest(batch_size=batch_size)
    p.steps[-1] = queue_joiner_for_test
    p.steps_as_tuple[-1] = (p.steps_as_tuple[-1][0], queue_joiner_for_test)
    p._refresh_steps()

    p, outputs = p.fit_transform(list(range(100)), list(range(100)))

    assert not p[-1].called_queue_joiner
    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
Beispiel #11
0
def test_parallel_queued_parallelize_correctly(tmpdir, use_processes,
                                               use_savers):
    sleep_time = 0.01
    p = SequentialQueuedPipeline(
        [('1', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('2', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('3', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)])),
         ('4', 2, 10,
          Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                    MultiplyByN(2)]))],
        batch_size=10,
        use_processes=use_processes,
        use_savers=use_savers).with_context(ExecutionContext(tmpdir))

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)]),
        Pipeline([ForEach(Sleep(sleep_time=sleep_time)),
                  MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Beispiel #12
0
def test_sequential_queued_pipeline_should_fit_without_multiprocessing():
    batch_size = 10
    p = SequentialQueuedPipeline([(1, FitTransformCallbackStep()),
                                  (1, FitTransformCallbackStep()),
                                  (1, FitTransformCallbackStep()),
                                  (1, FitTransformCallbackStep())],
                                 batch_size=batch_size,
                                 max_queue_size=5)
    queue_joiner_for_test = QueueJoinerForTest(batch_size=batch_size)
    p.steps[-1] = queue_joiner_for_test
    p.steps_as_tuple[-1] = (p.steps_as_tuple[-1][0], queue_joiner_for_test)
    p._refresh_steps()

    p = p.fit(list(range(100)), list(range(100)))

    assert not p[-1].called_queue_joiner