def fit_data_container(self, data_container: DataContainer, context: ExecutionContext) -> BaseStep: """ Fit all sub pipelines splitted by the Barrier steps. :param data_container: data container to transform. :param context: execution context :return: data container """ sub_pipelines = self._create_sub_pipelines() index_start = 0 for sub_pipeline in sub_pipelines: sub_pipeline.setup(context=context) barrier = sub_pipeline[-1] sub_pipeline, data_container = barrier.join_fit_transform( step=sub_pipeline, data_container=data_container, context=context) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) new_self = self[:index_start] + sub_pipeline if index_start + len(sub_pipeline) < len(self): new_self += self[index_start + len(sub_pipeline):] self.steps_as_tuple = new_self.steps_as_tuple index_start += len(sub_pipeline) return self
def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> \ Tuple['MiniBatchSequentialPipeline', DataContainer]: """ Transform all sub pipelines splitted by the Barrier steps. :param data_container: data container to transform. :param context: execution context :return: data container """ sub_pipelines = self._create_sub_pipelines() index_start = 0 for sub_pipeline in sub_pipelines: sub_context = context.push(sub_pipeline) sub_pipeline.setup() barrier = sub_pipeline[-1] sub_pipeline, data_container = barrier.join_fit_transform( step=sub_pipeline, data_container=data_container, context=sub_context) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) new_self = self[:index_start] + sub_pipeline if index_start + len(sub_pipeline) < len(self): new_self += self[index_start + len(sub_pipeline):] self.steps_as_tuple = new_self.steps_as_tuple index_start += len(sub_pipeline) return self, data_container
def handle_inverse_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Handle inverse transform by passing expected outputs to the wrapped step inverse transform method. Update the expected outputs with the outputs. :param context: execution context :param data_container: :return: data container :rtype: DataContainer """ new_expected_outputs_data_container = self.wrapped.handle_inverse_transform( DataContainer( current_ids=data_container.current_ids, data_inputs=data_container.expected_outputs, expected_outputs=None ), context.push(self.wrapped) ) data_container.set_expected_outputs(new_expected_outputs_data_container.data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_fit(self, data_container: DataContainer, context: ExecutionContext) -> (BaseStep, DataContainer): self.wrapped = self.wrapped.handle_fit( DataContainer(current_ids=data_container.current_ids, data_inputs=data_container.expected_outputs, expected_outputs=None), context.push(self.wrapped)) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return self, data_container
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: new_expected_outputs_data_container = self.wrapped.handle_transform( DataContainer(current_ids=data_container.current_ids, data_inputs=data_container.expected_outputs, expected_outputs=None), context.push(self.wrapped)) data_container.set_expected_outputs( new_expected_outputs_data_container.data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_inverse_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ :param context: execution context :type context: ExecutionContext :param data_container: data containerj :type data_container: DataContainer :return: data container :rtype: DataContainer """ self.inverse_transform_callback_function(data_container.data_inputs, *self.more_arguments) data_container = self.wrapped.handle_inverse_transform(data_container, context.push(self.wrapped)) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform then rehash ids with hyperparams and transformed data inputs :param data_container: data container to transform :param context: execution context :return: tuple(fitted pipeline, transformed data container) """ data_container = self._transform_core(data_container, context) ids = self.hash(data_container) data_container.set_current_ids(ids) return data_container
def handle_fit(self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): """ Fit transform then rehash ids with hyperparams and transformed data inputs :param data_container: data container to fit transform :param context: execution context :return: tuple(fitted pipeline, transformed data container) """ new_self, data_container = self._fit_core(data_container, context) ids = self.hash(data_container) data_container.set_current_ids(ids) return new_self, data_container
def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline': """ :param data_inputs: the data input to fit on :param expected_outputs: the expected data output to fit on :return: the pipeline itself """ self.setup() data_container = DataContainer(current_ids=None, data_inputs=data_inputs, expected_outputs=expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext(self.cache_folder, ExecutionMode.FIT_TRANSFORM) new_self = self.handle_fit(data_container, context) return new_self
def transform(self, data_inputs: Any): """ :param data_inputs: the data input to transform :return: transformed data inputs """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.TRANSFORM, self.cache_folder) data_container = self.handle_transform(data_container, context) return data_container.data_inputs
def transform(self, data_inputs: Any): """ After loading the last checkpoint, transform each pipeline steps :param data_inputs: the data input to transform :return: transformed data inputs """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.TRANSFORM, self.cache_folder) data_container = self._transform_core(data_container, context) return data_container.data_inputs
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Handle transform by updating the data inputs, and expected outputs inside the data container. :param context: execution context :param data_container: :return: """ di_eo = (data_container.data_inputs, data_container.expected_outputs) new_data_inputs, new_expected_outputs = self.transform(di_eo) data_container.set_data_inputs(new_data_inputs) data_container.set_expected_outputs(new_expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline': """ After loading the last checkpoint, fit each pipeline steps :param data_inputs: the data input to fit on :param expected_outputs: the expected data output to fit on :return: the pipeline itself """ data_container = DataContainer(current_ids=None, data_inputs=data_inputs, expected_outputs=expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root(self, ExecutionMode.FIT, self.cache_folder) new_self, data_container = self._fit_core(data_container, context) return new_self
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform data container. :param context: execution context :param data_container: the data container to transform :type data_container: neuraxle.data_container.DataContainer :return: transformed data container """ self.create_checkpoint_path(context.get_path()) outputs = self._transform_with_cache(data_container) data_container.set_data_inputs(outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform all sub pipelines splitted by the Barrier steps. :param data_container: data container to transform. :param context: execution context :return: data container """ sub_pipelines = self._create_sub_pipelines() for sub_pipeline in sub_pipelines: barrier = sub_pipeline[-1] data_container = barrier.join_transform( step=sub_pipeline, data_container=data_container, context=context) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_fit_transform( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): """ Handle transform by fitting the step, and updating the data inputs, and expected outputs inside the data container. :param context: execution context :param data_container: :return: """ new_self, (new_data_inputs, new_expected_outputs) = \ self.fit_transform((data_container.data_inputs, data_container.expected_outputs), None) data_container.set_data_inputs(new_data_inputs) data_container.set_expected_outputs(new_expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return new_self, data_container
def fit_transform(self, data_inputs, expected_outputs=None) -> ('Pipeline', Any): """ :param data_inputs: the data input to fit on :param expected_outputs: the expected data output to fit on :return: the pipeline itself """ self.setup() data_container = DataContainer(current_ids=None, data_inputs=data_inputs, expected_outputs=expected_outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) context = ExecutionContext.create_from_root( self, ExecutionMode.FIT_TRANSFORM, self.cache_folder) new_self, data_container = self.handle_fit_transform( data_container, context) return new_self, data_container.data_inputs
def handle_fit_transform( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): """ Fit transform data container. :param context: execution context :param data_container: the data container to transform :type data_container: neuraxle.data_container.DataContainer :return: tuple(fitted pipeline, data_container) """ self.create_checkpoint_path(context.get_path()) self.flush_cache() self.wrapped = self.wrapped.fit(data_container.data_inputs, data_container.expected_outputs) outputs = self._transform_with_cache(data_container) data_container.set_data_inputs(outputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return self, data_container