Esempio n. 1
0
def test_model_stacking_fit_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking, outputs = model_stacking.fit_transform(
        data_inputs, expected_outputs)

    assert outputs.shape == expected_outputs_shape
Esempio n. 2
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
Esempio n. 3
0
 def __init__(self,
              json_decoder: JSONDataBodyDecoder,
              wrapped: BaseStep,
              json_encoder: JSONDataResponseEncoder,
              route='/'):
     Pipeline.__init__(self, [json_decoder, wrapped, json_encoder])
     self.route: str = route
Esempio n. 4
0
def main():
    p = Pipeline([
        ForceAlwaysAlwaysHandleMixinStep(),
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))
    p = p.transform(np.array([0, 1]))
def main():
    """
    Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10.
    Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2.
    """
    sleep_time = 0.02
    p = SequentialQueuedPipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ], n_workers_per_step=8, max_queue_size=10, batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a
    print('SequentialQueuedPipeline')
    print('execution time: {} seconds'.format(time_queued_pipeline))

    """
    Process data inputs sequentially. 
    For each data input, sleep 0.02 seconds, and then multiply by 2.
    """
    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    print('VanillaPipeline')
    print('execution time: {} seconds'.format(time_vanilla_pipeline))

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Esempio n. 6
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Esempio n. 7
0
def test_inner_concatenate_data_should_merge_2d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container_3d = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('2d', data_container_2d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['2d'])])

    data_container_3d = p.handle_transform(data_container_3d,
                                           ExecutionContext())

    # Then
    assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                   SHAPE_3D[2] + 1)
    assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0],
                                                        SHAPE_3D[1],
                                                        SHAPE_3D[2] + 1)
    assert np.array_equal(data_container_3d.data_inputs[..., -1],
                          data_container_2d.data_inputs)
    assert np.array_equal(data_container_3d.expected_outputs[..., -1],
                          data_container_2d.expected_outputs)
Esempio n. 8
0
def test_inner_concatenate_data_should_merge_1d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
    data_container_1d = DataContainer(data_inputs=data_inputs_1d,
                                      expected_outputs=expected_outputs_1d)
    data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('1d', data_container_1d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['1d'])])

    data_container = p.handle_transform(data_container, ExecutionContext())

    # Then
    broadcasted_data_inputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.data_inputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))
    broadcasted_expected_outputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.expected_outputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))

    assert np.array_equal(data_container.data_inputs[..., -1],
                          broadcasted_data_inputs_1d)
    assert np.array_equal(data_container.expected_outputs[..., -1],
                          broadcasted_expected_outputs_1d)

    assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                SHAPE_3D[2] + 1)
    assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                     SHAPE_3D[2] + 1)
Esempio n. 9
0
def test_expectedoutputnull_is_fine_when_null(tmpdir):

    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = None

    p = Pipeline([SomeStep()])
    p.fit_transform(data_inputs,expected_outputs)
Esempio n. 10
0
def test_pipeline_nested_mutate_inverse_transform_without_identities():
    """
    This test was required for a strange bug at the border of the pipelines
    that happened when the identities were not used.
    """
    expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating, inversing, and calling each inverse_transform]")
    reversed(p).transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape, calling inverse_transforms.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Esempio n. 11
0
def test_pipeline_setup_incrementally():
    class SomeStepThatFits(NonTransformableMixin, BaseStep):
        def __init__(self):
            BaseStep.__init__(self)
            self.has_fitted = False

        def fit(self, data_inputs, expected_outputs=None) -> _FittableStep:
            self.has_fitted = True
            return self

    class StepWithSensitiveSetup(Identity):
        """ Asserts that step given in argument has fitted before performing setup"""
        def __init__(self):
            Identity.__init__(self)

        def setup(self, context: ExecutionContext = None) -> BaseTransformer:
            assert some_step.has_fitted is True
            assert some_step2.has_fitted is False
            return self

    some_step = SomeStepThatFits()
    some_step2 = SomeStepThatFits()

    p = Pipeline([some_step, StepWithSensitiveSetup(), some_step2])

    p.fit_transform(None, None)
Esempio n. 12
0
def test_apply_on_pipeline_with_positional_argument_should_call_method_on_each_steps():
    pipeline = Pipeline([MultiplyByN(1), MultiplyByN(1)])

    pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2}))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN1'].get_hyperparams()['multiply_by'] == 2
Esempio n. 13
0
def test_pipeline_set_one_hyperparam_level_one_flat():
    p = Pipeline([("a", SomeStep()), ("b", SomeStep()), ("c", SomeStep())])

    p.set_hyperparams({"a__learning_rate": 7})

    assert p["a"].hyperparams.to_flat_as_dict_primitive()["learning_rate"] == 7
    assert p["b"].hyperparams.to_flat_as_dict_primitive() == dict()
    assert p["c"].hyperparams.to_flat_as_dict_primitive() == dict()
Esempio n. 14
0
def test_apply_on_pipeline_with_meta_step_and_positional_argument_should_call_method_on_each_steps():
    pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])

    pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2}))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
Esempio n. 15
0
def test_pipeline_set_one_hyperparam_level_one_dict():
    p = Pipeline([("a", SomeStep()), ("b", SomeStep()), ("c", SomeStep())])

    p.set_hyperparams({"b": {"learning_rate": 7}})

    assert p["a"].hyperparams == dict()
    assert p["b"].hyperparams["learning_rate"] == 7
    assert p["c"].hyperparams == dict()
Esempio n. 16
0
def test_pipeline_fit_transform(steps_list, pipeline_runner):
    data_input_ = [AN_INPUT]
    expected_output_ = [AN_EXPECTED_OUTPUT]
    p = Pipeline(steps_list, pipeline_runner=pipeline_runner())

    p, result = p.fit_transform(data_input_, expected_output_)

    assert tuple(result) == tuple(expected_output_)
Esempio n. 17
0
def test_expectedoutputnull_raise_exception_when_notnull(tmpdir):
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 2

    p = Pipeline([AssertExpectedOutputIsNone()])

    with pytest.raises(AssertionError) as error_info:
        p.fit_transform(data_inputs, expected_outputs)
Esempio n. 18
0
def test_apply_method_on_pipeline_should_call_method_on_each_steps():
    pipeline = Pipeline([MultiplyByN(1), MultiplyByN(1)])

    pipeline.apply_method(lambda step: step.set_hyperparams(
        HyperparameterSamples({'multiply_by': 2})))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN1'].get_hyperparams()['multiply_by'] == 2
def main():
    p = Pipeline([MultiplyByN(multiply_by=2)])

    data_inputs = np.array([1, 2])
    generated_outputs = p.transform(data_inputs)
    regenerated_inputs = p.inverse_transform(generated_outputs)

    assert np.array_equal(regenerated_inputs, data_inputs)
    assert np.array_equal(generated_outputs, 2 * data_inputs)
Esempio n. 20
0
def test_pipeline_fit_then_transform(steps_list):
    data_input_ = [AN_INPUT]
    expected_output_ = [AN_EXPECTED_OUTPUT]
    p = Pipeline(steps_list)

    p = p.fit(data_input_, expected_output_)
    result = p.transform(data_input_)

    assert tuple(result) == tuple(expected_output_)
Esempio n. 21
0
def test_apply_method_on_pipeline_with_meta_step_should_call_method_on_each_steps():
    pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])

    pipeline.apply_method(
        lambda step: step.set_hyperparams(HyperparameterSamples({'multiply_by': 2}))
    )

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
Esempio n. 22
0
def main():
    p = Pipeline([
        NonFittableStep(),
        NonTransformableStep(),
        Identity()  # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))

    out = p.transform(np.array([0, 1]))
def test_add_service_assertions_should_fail_when_services_are_missing(tmpdir):
    with pytest.raises(AssertionError) as exception_info:
        context = ExecutionContext(root=tmpdir)
        p = Pipeline([SomeStep().assert_has_services(BaseService)
                      ]).with_context(context=context)
        data_inputs = np.array([0, 1, 2, 3])

        p.transform(data_inputs=data_inputs)

    assert 'BaseService dependency missing' in exception_info.value.args[0]
Esempio n. 24
0
def main():
    p = Pipeline([
        ('step1', MultiplyByN()),
        ('step2', MultiplyByN()),
        Pipeline([
            Identity(),
            Identity(),
            PCA(n_components=4)
        ])
    ])

    p.set_hyperparams_space({
        'step1__multiply_by': RandInt(42, 50),
        'step2__multiply_by': RandInt(-10, 0),
        'Pipeline__PCA__n_components': RandInt(2, 3)
    })

    samples = p.get_hyperparams_space().rvs()
    p.set_hyperparams(samples)

    samples = p.get_hyperparams().to_flat_as_dict_primitive()
    assert 42 <= samples['step1__multiply_by'] <= 50
    assert -10 <= samples['step2__multiply_by'] <= 0
    assert samples['Pipeline__PCA__n_components'] in [2, 3]
    assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
Esempio n. 25
0
def test_parallel_queued_parallelize_correctly():
    sleep_time = 0.001
    p = SequentialQueuedPipeline([
        ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]))
    ], batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Esempio n. 26
0
def test_localassert_should_assert_dependencies_properly_at_exec(tmpdir):
    data_inputs = np.array([0, 1, 2, 3])
    context = ExecutionContext(root=tmpdir)
    p = Pipeline([
        RegisterServiceDynamically(),
        SomeStep().assert_has_services_at_execution(SomeBaseService)
    ]).with_context(context=context)

    p.transform(data_inputs=data_inputs)
    service = context.get_service(SomeBaseService)
    assert np.array_equal(service.data, data_inputs)
Esempio n. 27
0
def test_wrapped_queued_pipeline_with_n_workers_step():
    p = Pipeline([
        SequentialQueuedPipeline([(1, MultiplyByN(2)), (1, MultiplyByN(2)),
                                  (1, MultiplyByN(2)), (1, MultiplyByN(2))],
                                 batch_size=10,
                                 max_queue_size=5)
    ])

    outputs = p.transform(list(range(100)))

    assert np.array_equal(outputs, EXPECTED_OUTPUTS)
def test_with_context_should_inject_dependencies_properly(tmpdir):
    data_inputs = np.array([0, 1, 2, 3])
    context = ExecutionContext(root=tmpdir)
    service = SomeService()
    context.set_service_locator({BaseService: service})
    p = Pipeline([SomeStep().assert_has_services(BaseService)
                  ]).with_context(context=context)

    p.transform(data_inputs=data_inputs)

    assert np.array_equal(service.data, data_inputs)
def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output(
        tmpdir: LocalPath):
    pipeline = Pipeline([MultiplyBy2OutputTransformer()])

    pipeline, new_data_container = pipeline.handle_fit_transform(
        DataContainer(data_inputs=[1, 2, 3],
                      current_ids=[0, 1, 2],
                      expected_outputs=[2, 3, 4]), ExecutionContext(tmpdir))

    assert new_data_container.data_inputs == [2, 4, 6]
    assert new_data_container.expected_outputs == [4, 6, 8]
Esempio n. 30
0
def main():
    p = Pipeline([MultiplyByN(2), MultiplyByN(4)])

    outputs = p.transform(list(range(10)))
    print('transform: {}'.format(outputs))

    p = p.mutate(new_method='inverse_transform',
                 method_to_assign_to='transform')

    outputs = p.transform(list(range(10)))
    print('inverse_transform: {}'.format(outputs))