예제 #1
0
    def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \
            Tuple['Any', DataContainer]:
        """
        Concatenate the pipeline fit transform output of each batch of self.batch_size together.
        :param step: pipeline to fit transform on
        :type step: Pipeline
        :param data_container: data container to fit transform on
        :type data_container: DataContainer
        :param context: execution context
        :return: fitted self, transformed data inputs
        :rtype: Tuple[Any, DataContainer]
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(
                data_container_batch, context)
            output_data_container.concat(data_container_batch)

        return step, output_data_container
예제 #2
0
def test_data_container_batching(batch_size, include_incomplete_pass,
                                 default_value, expected_data_containers):
    data_container = DataContainer(current_ids=[str(i) for i in range(10)],
                                   data_inputs=np.array(list(range(10))),
                                   expected_outputs=np.array(
                                       list(range(10, 20))))

    # When
    data_containers = []
    for dc in data_container.minibatches(
            batch_size=batch_size,
            include_incomplete_batch=include_incomplete_pass,
            default_value_data_inputs=default_value):
        data_containers.append(dc)

    # Then
    assert len(expected_data_containers) == len(data_containers)
    for expected_data_container, actual_data_container in zip(
            expected_data_containers, data_containers):
        np.array_equal(expected_data_container.current_ids,
                       actual_data_container.current_ids)
        np.array_equal(expected_data_container.data_inputs,
                       actual_data_container.data_inputs)
        np.array_equal(expected_data_container.expected_outputs,
                       actual_data_container.expected_outputs)
예제 #3
0
    def join_transform(self, step: Pipeline, data_container: DataContainer,
                       context: ExecutionContext) -> DataContainer:
        """
        Concatenate the pipeline transform output of each batch of self.batch_size together.
        :param step: pipeline to transform on
        :type step: Pipeline
        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :return: transformed data container
        :rtype: DataContainer
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            output_data_container.concat(
                step._transform_data_container(data_container_batch, context))

        return output_data_container
예제 #4
0
    def transform_data_container(self, data_container: DataContainer,
                                 context: ExecutionContext) -> DataContainer:
        """
        Transform data container

        :param data_container: data container to transform.
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: data container
        """
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        n_batches = self.get_n_batches(data_container)
        self[-1].set_n_batches(n_batches)

        for name, step in self[:-1]:
            step.start(context)

        batch_index = 0
        for data_container_batch in data_container_batches:
            self.send_batch_to_queued_pipeline(
                batch_index=batch_index, data_container=data_container_batch)
            batch_index += 1

        data_container = self[-1].join(original_data_container=data_container)
        return data_container
예제 #5
0
    def join_transform(self, step: TruncableSteps, data_container: DataContainer,
                       context: ExecutionContext) -> ZipDataContainer:
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            keep_incomplete_batch=self.keep_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs
        )

        output_data_container = []
        for data_container_batch in data_container_batches:
            output_data_container.append(step._transform_data_container(data_container_batch, context))

        return ZipDataContainer.create_from(*output_data_container)
예제 #6
0
def test_data_container_should_iterate_through_batches_using_convolved():
    data_container = DataContainer(current_ids=[str(i) for i in range(100)],
                                   data_inputs=np.array(list(range(100))),
                                   expected_outputs=np.array(
                                       list(range(100, 200))))

    batches = []
    for b in data_container.minibatches(batch_size=10):
        batches.append(b)

    for i, batch in enumerate(batches):
        assert np.array_equal(np.array(batch.data_inputs),
                              np.array(list(range(i * 10, (i * 10) + 10))))
        assert np.array_equal(
            np.array(batch.expected_outputs),
            np.array(list(range((i * 10) + 100, (i * 10) + 100 + 10))))
예제 #7
0
    def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \
            Tuple['Any', DataContainer]:
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            keep_incomplete_batch=self.keep_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs
        )

        output_data_container = []
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(data_container_batch, context)
            output_data_container.append(data_container_batch)

        return step, ZipDataContainer.create_from(*output_data_container)
예제 #8
0
def test_data_container_minibatch_should_be_lazy_and_use_getitem_when_data_is_lazy_loadable(
):
    items = [LoadableItem() for _ in range(10)]
    data_inputs = SomeLazyLoadableCollection(items)
    expected_outputs = SomeLazyLoadableCollection(
        [LoadableItem() for _ in range(10)])
    data_container = DataContainer(data_inputs=data_inputs,
                                   expected_outputs=expected_outputs)

    i = 0
    batch_size = 2
    for batch in data_container.minibatches(batch_size=batch_size):
        assert len(batch) == batch_size
        assert all(item.is_loaded()
                   for item in data_inputs.inner_list[:(i * batch_size)])
        for y in range((i + 1) * batch_size, len(data_inputs)):
            assert not items[y].is_loaded()
        i += 1