def test_pickle_checkpoint_step_should_load_data_container(tmpdir: LocalPath): initial_data_inputs = [1, 2] initial_expected_outputs = [2, 3] create_pipeline_output_transformer = lambda: ResumablePipeline([ ('output_transformer_1', MultiplyBy2OutputTransformer()), ('pickle_checkpoint', DefaultCheckpoint()), ('output_transformer_2', MultiplyBy2OutputTransformer()), ], cache_folder =tmpdir) create_pipeline_output_transformer().fit_transform( data_inputs=initial_data_inputs, expected_outputs=initial_expected_outputs) transformer = create_pipeline_output_transformer() actual_data_container = transformer.handle_transform( DataContainer(current_ids=[0, 1], data_inputs=initial_data_inputs, expected_outputs=initial_expected_outputs), ExecutionContext.create_from_root(transformer, ExecutionMode.TRANSFORM, tmpdir)) assert np.array_equal(actual_data_container.data_inputs, [4, 8]) assert np.array_equal(actual_data_container.expected_outputs, [8, 12])
def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output( tmpdir): pipeline = Pipeline([MultiplyBy2OutputTransformer()]) pipeline, new_data_container = pipeline.handle_fit_transform( DataContainer(current_ids=[0, 1, 2], data_inputs=[1, 2, 3], expected_outputs=[2, 3, 4]), ExecutionContext.create_from_root(pipeline, ExecutionMode.FIT_TRANSFORM, tmpdir)) assert new_data_container.data_inputs == [2, 4, 6] assert new_data_container.expected_outputs == [4, 6, 8]
def transform(self, data_inputs: Any): """ :param data_inputs: the data input to transform :return: transformed data inputs """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.TRANSFORM, self.cache_folder) data_container = self.handle_transform(data_container, context) return data_container.data_inputs
def transform(self, data_inputs: Any): """ After loading the last checkpoint, transform each pipeline steps :param data_inputs: the data input to transform :return: transformed data inputs """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.TRANSFORM, self.cache_folder) data_container = self._transform_core(data_container, context) return data_container.data_inputs
def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline': """ :param data_inputs: the data input to fit on :param expected_outputs: the expected data output to fit on :return: the pipeline itself """ self.setup() data_container = DataContainer(current_ids=None, data_inputs=data_inputs, expected_outputs=expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root( self, ExecutionMode.FIT_TRANSFORM, self.cache_folder) new_self, data_container = self.handle_fit(data_container, context) return new_self
def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline': """ After loading the last checkpoint, fit each pipeline steps :param data_inputs: the data input to fit on :param expected_outputs: the expected data output to fit on :return: the pipeline itself """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs, expected_outputs=expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.FIT, self.cache_folder) new_self, data_container = self._fit_core(data_container, context) return new_self