예제 #1
0
 def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
     if any(not isinstance(di, DataContainer) for di in data_container.data_inputs):
         raise ValueError("data_inputs given to ZipFeatures must be a list of DataContainer instances")
     data_container = ZipDataContainer.create_from(*data_container.data_inputs)
     if self.concatenate_inner_features:
         data_container.concatenate_inner_features()
     return data_container
예제 #2
0
    def join_transform(self, step: TruncableSteps, data_container: DataContainer,
                       context: ExecutionContext) -> ZipDataContainer:
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            keep_incomplete_batch=self.keep_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs
        )

        output_data_container = []
        for data_container_batch in data_container_batches:
            output_data_container.append(step._transform_data_container(data_container_batch, context))

        return ZipDataContainer.create_from(*output_data_container)
예제 #3
0
def test_zip_data_container_should_merge_two_data_sources_together():
    data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
    data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container = DataContainer(data_inputs=data_inputs_3d,
                                   expected_outputs=expected_outputs_3d)

    zip_data_container = ZipDataContainer.create_from(data_container,
                                                      data_container_2d)

    assert zip_data_container.current_ids == data_container.current_ids
    for i, di in enumerate(zip_data_container.data_inputs):
        assert np.array_equal(di[0], data_inputs_3d[i])
        assert np.array_equal(di[1], data_inputs_2d[i])
예제 #4
0
    def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \
            Tuple['Any', DataContainer]:
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            keep_incomplete_batch=self.keep_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs
        )

        output_data_container = []
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(data_container_batch, context)
            output_data_container.append(data_container_batch)

        return step, ZipDataContainer.create_from(*output_data_container)
예제 #5
0
def test_zip_data_container_should_concatenate_inner_features():
    data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
    data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container = DataContainer(data_inputs=data_inputs_3d,
                                   expected_outputs=expected_outputs_3d)

    zip_data_container = ZipDataContainer.create_from(data_container,
                                                      data_container_2d)
    zip_data_container.concatenate_inner_features()

    assert np.array_equal(
        np.array(zip_data_container.data_inputs)[..., -1],
        data_container_2d.data_inputs)
    assert np.array_equal(np.array(zip_data_container.expected_outputs),
                          expected_outputs_3d)