def test_list_data_container_concat(): # Given data_container = ListDataContainer( current_ids=[str(i) for i in range(100)], data_inputs=np.array(list(range(100))), expected_outputs=np.array(list(range(100, 200)))) # When data_container.concat( DataContainer(current_ids=[str(i) for i in range(100, 200)], data_inputs=np.array(list(range(100, 200))), expected_outputs=np.array(list(range(200, 300))))) # Then assert np.array_equal(np.array(data_container.current_ids), np.array(list(range(0, 200))).astype(np.str)) expected_data_inputs = np.array(list(range(0, 200))).astype(np.int) actual_data_inputs = np.array(data_container.data_inputs).astype(np.int) assert np.array_equal(actual_data_inputs, expected_data_inputs) expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int) assert np.array_equal( np.array(data_container.expected_outputs).astype(np.int), expected_expected_outputs)
def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \ Tuple['Any', DataContainer]: """ Concatenate the pipeline fit transform output of each batch of self.batch_size together. :param step: pipeline to fit transform on :type step: Pipeline :param data_container: data container to fit transform on :type data_container: DataContainer :param context: execution context :return: fitted self, transformed data inputs :rtype: Tuple[Any, DataContainer] """ context = context.push(step) data_container_batches = data_container.minibatches( batch_size=self.batch_size, include_incomplete_batch=self.include_incomplete_batch, default_value_data_inputs=self.default_value_data_inputs, default_value_expected_outputs=self.default_value_expected_outputs) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: step, data_container_batch = step._fit_transform_data_container( data_container_batch, context) output_data_container.concat(data_container_batch) return step, output_data_container
def join_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Concatenate the pipeline transform output of each batch of self.batch_size together. :param step: pipeline to transform on :type step: Pipeline :param data_container: data container to transform :type data_container: DataContainer :param context: execution context :return: transformed data container :rtype: DataContainer """ context = context.push(step) data_container_batches = data_container.minibatches( batch_size=self.batch_size, include_incomplete_batch=self.include_incomplete_batch, default_value_data_inputs=self.default_value_data_inputs, default_value_expected_outputs=self.default_value_expected_outputs) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: output_data_container.concat( step._transform_data_container(data_container_batch, context)) return output_data_container
def read_checkpoint(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Read data container data inputs checkpoint with :py:attr:`~data_input_checkpointer`. Read data container expected outputs checkpoint with :py:attr:`~expected_output_checkpointer`. :param data_container: data container to read checkpoint for :type data_container: neuraxle.data_container.DataContainer :param context: execution context to read checkpoint from :type context: ExecutionContext :return: data container checkpoint :rtype: neuraxle.data_container.DataContainer """ data_container_checkpoint = ListDataContainer.empty( original_data_container=data_container) current_ids = self.summary_checkpointer.read_summary( checkpoint_path=context.get_path(), data_container=data_container) for current_id in current_ids: data_input = self.data_input_checkpointer.read_checkpoint( checkpoint_path=self._get_data_input_checkpoint_path(context), current_id=current_id) expected_output = self.expected_output_checkpointer.read_checkpoint( checkpoint_path=self._get_expected_output_checkpoint_path( context), current_id=current_id) data_container_checkpoint.append(current_id, data_input, expected_output) return data_container_checkpoint
def join_fit_transform( self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> Tuple['Any', DataContainer]: """ Concatenate the pipeline fit transform output of each batch of self.batch_size together. :param step: pipeline to fit transform on :type step: Pipeline :param data_container: data container to fit transform on :type data_container: DataContainer :param context: execution context :return: fitted self, transformed data inputs :rtype: Tuple[Any, DataContainer] """ context = context.push(step) data_container_batches = data_container.convolved_1d( stride=self.batch_size, kernel_size=self.batch_size) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: step, data_container_batch = step._fit_transform_data_container( data_container_batch, context) output_data_container.concat(data_container_batch) return step, output_data_container
def join(self, original_data_container: DataContainer) -> DataContainer: """ Return the accumulated results received by the on next method of this observer. :return: transformed data container :rtype: DataContainer """ while self.n_batches_left_to_do > 0: task: QueuedPipelineTask = self.queue.get() self.n_batches_left_to_do -= 1 step_name = task.step_name if step_name not in self.result: self.result[step_name] = ListDataContainer( current_ids=[], data_inputs=[], expected_outputs=[], summary_id=task.data_container.summary_id ) self.result[step_name].append_data_container_in_data_inputs(task.data_container) data_containers = self._join_all_step_results() self.result = {} return original_data_container.set_data_inputs(data_containers)
def _fit_transform_data_container(self, data_container: DataContainer, context: ExecutionContext): """ Fit transform each step for each data inputs, and expected outputs :param data_container: data container to fit transform :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self, transformed_data_container """ output_data_container = ListDataContainer.empty() for current_id, di, eo in data_container: self.wrapped, output = self.wrapped.handle_fit_transform( DataContainer(current_ids=None, data_inputs=di, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) output_data_container.summary_id = data_container.summary_id return self, output_data_container
def join_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Concatenate the pipeline transform output of each batch of self.batch_size together. :param step: pipeline to transform on :type step: Pipeline :param data_container: data container to transform :type data_container: DataContainer :param context: execution context :return: transformed data container :rtype: DataContainer """ context = context.push(step) data_container_batches = data_container.convolved_1d( stride=self.batch_size, kernel_size=self.batch_size ) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: output_data_container.concat( step._transform_data_container(data_container_batch, context) ) return output_data_container
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform each step for each data inputs. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self """ output_data_container: ListDataContainer = ListDataContainer.empty( original_data_container=data_container) for current_id, di, eo in data_container: output: DataContainer = self.wrapped.handle_transform( DataContainer(data_inputs=di, current_ids=None, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) output_data_container.summary_id = data_container.summary_id return output_data_container
def handle_transform(self, data_container: DataContainer, context: ExecutionContext): """ Transform each step for each data inputs. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self """ output_data_container = ListDataContainer.empty() for current_id, di, eo in data_container: output = self.wrapped.handle_transform( DataContainer(current_ids=None, data_inputs=di, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) current_ids = self.hash(data_container) output_data_container.set_current_ids(current_ids) return output_data_container
def _fit_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> Tuple[BaseStep, DataContainer]: """ Fit transform each step for each data inputs, and expected outputs :param data_container: data container to fit transform :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self, transformed_data_container """ output_data_container: DataContainer = ListDataContainer.empty( original_data_container=data_container) for current_id, di, eo in data_container: try: self.wrapped, output = self.wrapped.handle_fit_transform( DataContainer(data_inputs=di, current_ids=None, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) except ContinueInterrupt: continue except BreakInterrupt: break output_data_container.summary_id = data_container.summary_id return self, output_data_container
def _join_step_results(self, data_containers): # reorder results by summary id data_containers.data_inputs.sort(key=lambda dc: self.summary_ids.index(dc.summary_id)) step_results = ListDataContainer.empty() for data_container in data_containers.data_inputs: data_container = data_container.set_summary_id(data_containers.data_inputs[-1].summary_id) step_results.concat(data_container) return step_results
def _inverse_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: inverse_transform_results = [] for i, (current_ids, data_inputs, expected_outputs) in enumerate(data_container): inverse_transform_result = self[i].handle_inverse_transform( DataContainer(current_ids=current_ids, data_inputs=data_inputs, expected_outputs=expected_outputs), context) inverse_transform_results.append(inverse_transform_result) output_data_container = ListDataContainer.empty() for data_container_batch in inverse_transform_results: output_data_container.append_data_container(data_container_batch) return output_data_container
def _fit_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): fitted_steps_data_containers = [] for i, (current_ids, data_inputs, expected_outputs) in enumerate(data_container): fitted_step_data_container = self.steps[i].handle_fit_transform( DataContainer(current_ids=current_ids, data_inputs=data_inputs, expected_outputs=expected_outputs), context) fitted_steps_data_containers.append(fitted_step_data_container) self.steps = [step for step, _ in fitted_steps_data_containers] output_data_container = ListDataContainer.empty() for _, data_container_batch in fitted_steps_data_containers: output_data_container.append_data_container(data_container_batch) return self, output_data_container