def test_queued_pipeline_with_included_incomplete_batch(): p = SequentialQueuedPipeline( [MultiplyByN(2), MultiplyByN(2), MultiplyByN(2), MultiplyByN(2)], batch_size=10, keep_incomplete_batch=True, default_value_data_inputs=AbsentValuesNullObject(), default_value_expected_outputs=AbsentValuesNullObject(), n_workers_per_step=1, max_queue_size=5) outputs = p.transform(list(range(15))) assert np.array_equal(outputs, np.array(list(range(15))) * 2 * 2 * 2 * 2)
def __init__( self, batch_size: int, keep_incomplete_batch: bool = True, default_value_data_inputs=AbsentValuesNullObject(), default_value_expected_outputs=None ): Barrier.__init__(self) self.batch_size: int = batch_size self.keep_incomplete_batch: bool = keep_incomplete_batch self.default_value_data_inputs: Union[Any, AbsentValuesNullObject] = default_value_data_inputs self.default_value_expected_outputs: Union[Any, AbsentValuesNullObject] = default_value_expected_outputs
def __init__(self, steps: NamedTupleList, batch_size=None, keep_incomplete_batch: bool = None, default_value_data_inputs=AbsentValuesNullObject(), default_value_expected_outputs=None, cache_folder=None, mute_joiner_batch_size_warning: bool = True): Pipeline.__init__(self, steps=steps, cache_folder=cache_folder) ForceHandleMixin.__init__(self) self.default_value_data_inputs = default_value_data_inputs self.default_value_expected_outputs = default_value_expected_outputs self.__validate_barriers_batch_size(batch_size=batch_size) self.__patch_missing_barrier( batch_size=batch_size, keep_incomplete_batch=keep_incomplete_batch, default_value_data_inputs=default_value_data_inputs, default_value_expected_outputs=default_value_expected_outputs) self.mute_joiner_batch_size_warning = mute_joiner_batch_size_warning self.__patch_barriers_batch_size(batch_size)
]), (3, True, 0, [ DataContainer(current_ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]), DataContainer(current_ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]), DataContainer(current_ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]), DataContainer(current_ids=[0, 1, 2], data_inputs=[9, 0, 0], expected_outputs=[19, 0, 0]) ]), (3, True, AbsentValuesNullObject(), [ DataContainer(current_ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]), DataContainer(current_ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]), DataContainer(current_ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]), DataContainer(current_ids=[9], data_inputs=[9], expected_outputs=[19]) ])]) def test_data_container_batching(batch_size, include_incomplete_pass, default_value, expected_data_containers): data_container = DataContainer(current_ids=[str(i) for i in range(10)], data_inputs=np.array(list(range(10))),