Ejemplo n.º 1
0
def test_list_data_container_concat():
    # Given
    data_container = ListDataContainer(
        current_ids=[str(i) for i in range(100)],
        data_inputs=np.array(list(range(100))),
        expected_outputs=np.array(list(range(100, 200))))

    # When
    data_container.concat(
        DataContainer(current_ids=[str(i) for i in range(100, 200)],
                      data_inputs=np.array(list(range(100, 200))),
                      expected_outputs=np.array(list(range(200, 300)))))

    # Then
    assert np.array_equal(np.array(data_container.current_ids),
                          np.array(list(range(0, 200))).astype(np.str))

    expected_data_inputs = np.array(list(range(0, 200))).astype(np.int)
    actual_data_inputs = np.array(data_container.data_inputs).astype(np.int)
    assert np.array_equal(actual_data_inputs, expected_data_inputs)

    expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int)
    assert np.array_equal(
        np.array(data_container.expected_outputs).astype(np.int),
        expected_expected_outputs)
Ejemplo n.º 2
0
    def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \
            Tuple['Any', DataContainer]:
        """
        Concatenate the pipeline fit transform output of each batch of self.batch_size together.
        :param step: pipeline to fit transform on
        :type step: Pipeline
        :param data_container: data container to fit transform on
        :type data_container: DataContainer
        :param context: execution context
        :return: fitted self, transformed data inputs
        :rtype: Tuple[Any, DataContainer]
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(
                data_container_batch, context)
            output_data_container.concat(data_container_batch)

        return step, output_data_container
Ejemplo n.º 3
0
    def join_transform(self, step: Pipeline, data_container: DataContainer,
                       context: ExecutionContext) -> DataContainer:
        """
        Concatenate the pipeline transform output of each batch of self.batch_size together.
        :param step: pipeline to transform on
        :type step: Pipeline
        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :return: transformed data container
        :rtype: DataContainer
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            output_data_container.concat(
                step._transform_data_container(data_container_batch, context))

        return output_data_container
Ejemplo n.º 4
0
    def read_checkpoint(self, data_container: DataContainer,
                        context: ExecutionContext) -> DataContainer:
        """
        Read data container data inputs checkpoint with :py:attr:`~data_input_checkpointer`.
        Read data container expected outputs checkpoint with :py:attr:`~expected_output_checkpointer`.

        :param data_container: data container to read checkpoint for
        :type data_container: neuraxle.data_container.DataContainer
        :param context: execution context to read checkpoint from
        :type context: ExecutionContext
        :return: data container checkpoint
        :rtype: neuraxle.data_container.DataContainer
        """
        data_container_checkpoint = ListDataContainer.empty(
            original_data_container=data_container)

        current_ids = self.summary_checkpointer.read_summary(
            checkpoint_path=context.get_path(), data_container=data_container)

        for current_id in current_ids:
            data_input = self.data_input_checkpointer.read_checkpoint(
                checkpoint_path=self._get_data_input_checkpoint_path(context),
                current_id=current_id)

            expected_output = self.expected_output_checkpointer.read_checkpoint(
                checkpoint_path=self._get_expected_output_checkpoint_path(
                    context),
                current_id=current_id)

            data_container_checkpoint.append(current_id, data_input,
                                             expected_output)

        return data_container_checkpoint
Ejemplo n.º 5
0
    def join_fit_transform(
            self, step: Pipeline, data_container: DataContainer,
            context: ExecutionContext) -> Tuple['Any', DataContainer]:
        """
        Concatenate the pipeline fit transform output of each batch of self.batch_size together.
        :param step: pipeline to fit transform on
        :type step: Pipeline
        :param data_container: data container to fit transform on
        :type data_container: DataContainer
        :param context: execution context
        :return: fitted self, transformed data inputs
        :rtype: Tuple[Any, DataContainer]
        """
        context = context.push(step)

        data_container_batches = data_container.convolved_1d(
            stride=self.batch_size, kernel_size=self.batch_size)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(
                data_container_batch, context)
            output_data_container.concat(data_container_batch)

        return step, output_data_container
Ejemplo n.º 6
0
    def join(self, original_data_container: DataContainer) -> DataContainer:
        """
        Return the accumulated results received by the on next method of this observer.

        :return: transformed data container
        :rtype: DataContainer
        """
        while self.n_batches_left_to_do > 0:
            task: QueuedPipelineTask = self.queue.get()
            self.n_batches_left_to_do -= 1
            step_name = task.step_name

            if step_name not in self.result:
                self.result[step_name] = ListDataContainer(
                    current_ids=[],
                    data_inputs=[],
                    expected_outputs=[],
                    summary_id=task.data_container.summary_id
                )

            self.result[step_name].append_data_container_in_data_inputs(task.data_container)

        data_containers = self._join_all_step_results()
        self.result = {}
        return original_data_container.set_data_inputs(data_containers)
Ejemplo n.º 7
0
    def _fit_transform_data_container(self, data_container: DataContainer,
                                      context: ExecutionContext):
        """
        Fit transform each step for each data inputs, and expected outputs

        :param data_container: data container to fit transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext

        :return: self, transformed_data_container
        """
        output_data_container = ListDataContainer.empty()

        for current_id, di, eo in data_container:
            self.wrapped, output = self.wrapped.handle_fit_transform(
                DataContainer(current_ids=None,
                              data_inputs=di,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)

        output_data_container.summary_id = data_container.summary_id

        return self, output_data_container
Ejemplo n.º 8
0
    def join_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
        """
        Concatenate the pipeline transform output of each batch of self.batch_size together.

        :param step: pipeline to transform on
        :type step: Pipeline
        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :return: transformed data container
        :rtype: DataContainer
        """
        context = context.push(step)

        data_container_batches = data_container.convolved_1d(
            stride=self.batch_size,
            kernel_size=self.batch_size
        )

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            output_data_container.concat(
                step._transform_data_container(data_container_batch, context)
            )

        return output_data_container
Ejemplo n.º 9
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext) -> DataContainer:
        """
        Transform each step for each data inputs.

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self
        """
        output_data_container: ListDataContainer = ListDataContainer.empty(
            original_data_container=data_container)

        for current_id, di, eo in data_container:
            output: DataContainer = self.wrapped.handle_transform(
                DataContainer(data_inputs=di,
                              current_ids=None,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)
        output_data_container.summary_id = data_container.summary_id

        return output_data_container
Ejemplo n.º 10
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext):
        """
        Transform each step for each data inputs.

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self
        """
        output_data_container = ListDataContainer.empty()

        for current_id, di, eo in data_container:
            output = self.wrapped.handle_transform(
                DataContainer(current_ids=None,
                              data_inputs=di,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)

        current_ids = self.hash(data_container)
        output_data_container.set_current_ids(current_ids)

        return output_data_container
Ejemplo n.º 11
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> Tuple[BaseStep, DataContainer]:
        """
        Fit transform each step for each data inputs, and expected outputs

        :param data_container: data container to fit transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext

        :return: self, transformed_data_container
        """
        output_data_container: DataContainer = ListDataContainer.empty(
            original_data_container=data_container)

        for current_id, di, eo in data_container:
            try:
                self.wrapped, output = self.wrapped.handle_fit_transform(
                    DataContainer(data_inputs=di,
                                  current_ids=None,
                                  expected_outputs=eo), context)
                output_data_container.append(current_id, output.data_inputs,
                                             output.expected_outputs)
            except ContinueInterrupt:
                continue
            except BreakInterrupt:
                break

        output_data_container.summary_id = data_container.summary_id

        return self, output_data_container
Ejemplo n.º 12
0
    def _join_step_results(self, data_containers):
        # reorder results by summary id
        data_containers.data_inputs.sort(key=lambda dc: self.summary_ids.index(dc.summary_id))

        step_results = ListDataContainer.empty()
        for data_container in data_containers.data_inputs:
            data_container = data_container.set_summary_id(data_containers.data_inputs[-1].summary_id)
            step_results.concat(data_container)

        return step_results
Ejemplo n.º 13
0
    def _inverse_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> DataContainer:
        inverse_transform_results = []
        for i, (current_ids, data_inputs,
                expected_outputs) in enumerate(data_container):
            inverse_transform_result = self[i].handle_inverse_transform(
                DataContainer(current_ids=current_ids,
                              data_inputs=data_inputs,
                              expected_outputs=expected_outputs), context)
            inverse_transform_results.append(inverse_transform_result)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in inverse_transform_results:
            output_data_container.append_data_container(data_container_batch)
        return output_data_container
Ejemplo n.º 14
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseStep', DataContainer):
        fitted_steps_data_containers = []
        for i, (current_ids, data_inputs,
                expected_outputs) in enumerate(data_container):
            fitted_step_data_container = self.steps[i].handle_fit_transform(
                DataContainer(current_ids=current_ids,
                              data_inputs=data_inputs,
                              expected_outputs=expected_outputs), context)
            fitted_steps_data_containers.append(fitted_step_data_container)
        self.steps = [step for step, _ in fitted_steps_data_containers]

        output_data_container = ListDataContainer.empty()
        for _, data_container_batch in fitted_steps_data_containers:
            output_data_container.append_data_container(data_container_batch)

        return self, output_data_container