Esempio n. 1
0
def test_pipeline_setup_incrementally():
    class SomeStepThatFits(NonTransformableMixin, BaseStep):
        def __init__(self):
            BaseStep.__init__(self)
            self.has_fitted = False

        def fit(self, data_inputs, expected_outputs=None) -> _FittableStep:
            self.has_fitted = True
            return self

    class StepWithSensitiveSetup(Identity):
        """ Asserts that step given in argument has fitted before performing setup"""
        def __init__(self):
            Identity.__init__(self)

        def setup(self, context: ExecutionContext = None) -> BaseTransformer:
            assert some_step.has_fitted is True
            assert some_step2.has_fitted is False
            return self

    some_step = SomeStepThatFits()
    some_step2 = SomeStepThatFits()

    p = Pipeline([some_step, StepWithSensitiveSetup(), some_step2])

    p.fit_transform(None, None)
Esempio n. 2
0
def test_expectedoutputnull_is_fine_when_null(tmpdir):

    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = None

    p = Pipeline([SomeStep()])
    p.fit_transform(data_inputs,expected_outputs)
Esempio n. 3
0
def test_expectedoutputnull_raise_exception_when_notnull(tmpdir):
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 2

    p = Pipeline([AssertExpectedOutputIsNone()])

    with pytest.raises(AssertionError) as error_info:
        p.fit_transform(data_inputs, expected_outputs)
Esempio n. 4
0
def test_forcehandleidentity_does_not_crash(tmpdir):
    p = Pipeline([
        ForceHandleIdentity()
    ])
    data_inputs = np.array([0, 1, 2, 3])
    expected_outputs = data_inputs * 2
    p.fit(data_inputs, expected_outputs)
    p.fit_transform(data_inputs, expected_outputs)
    p.transform(data_inputs=data_inputs)
Esempio n. 5
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Esempio n. 6
0
def test_pipeline_nested_mutate_inverse_transform():
    expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            Identity(),
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
            Identity()
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
        Identity()
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating]")
    p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform")

    p.transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Esempio n. 7
0
def test_pipeline_nested_mutate_inverse_transform_without_identities():
    """
    This test was required for a strange bug at the border of the pipelines
    that happened when the identities were not used.
    """
    expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating, inversing, and calling each inverse_transform]")
    reversed(p).transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape, calling inverse_transforms.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Esempio n. 8
0
def test_model_stacking_fit_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking, outputs = model_stacking.fit_transform(
        data_inputs, expected_outputs)

    assert outputs.shape == expected_outputs_shape
Esempio n. 9
0
def test_pipeline_fit_transform(steps_list, pipeline_runner):
    data_input_ = [AN_INPUT]
    expected_output_ = [AN_EXPECTED_OUTPUT]
    p = Pipeline(steps_list, pipeline_runner=pipeline_runner())

    p, result = p.fit_transform(data_input_, expected_output_)

    assert tuple(result) == tuple(expected_output_)
Esempio n. 10
0
def test_tape_callback():
    expected_tape = ["1", "2", "3", "a", "b", "4"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        TransformCallbackStep(tape.callback, ["3"]),
        AddFeatures([
            TransformCallbackStep(tape.callback, ["a"]),
            TransformCallbackStep(tape.callback, ["b"]),
        ]),
        TransformCallbackStep(tape.callback, ["4"]),
        Identity()
    ])
    p.fit_transform(np.ones((1, 1)))

    assert tape.get_name_tape() == expected_tape
def test_tensorflowv1_saver(tmpdir):
    data_inputs = np.array([
        3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167, 7.042,
        10.791, 5.313, 7.997, 5.654, 9.27, 3.1
    ])
    expected_ouptuts = np.array([
        1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221, 2.827,
        3.465, 1.65, 2.904, 2.42, 2.94, 1.3
    ])
    model = Pipeline([create_model_step()])

    for i in range(50):
        model, outputs = model.fit_transform(data_inputs, expected_ouptuts)

    model.save(ExecutionContext(root=tmpdir))

    model = Pipeline([create_model_step()]).load(ExecutionContext(root=tmpdir))
    model, outputs = model.fit_transform(data_inputs, expected_ouptuts)
    assert ((outputs - expected_ouptuts)**2).mean() < 0.25
Esempio n. 12
0
def test_fit_transform(steps: List[BaseStep], expected_tape: List[str]):
    tape.data = []
    tape.name_tape = []
    pipeline = Pipeline(steps=steps)

    actual_pipeline, actual_data_inputs = pipeline.fit_transform(
        data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert isinstance(actual_pipeline, Pipeline)
    assert actual_tape == expected_tape
    assert np.array_equal(actual_data_inputs, data_inputs)
Esempio n. 13
0
def test_feature_union_should_fit_transform_with_zip_features():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=ZipFeatures())])
    data_inputs = np.random.randint(low=0, high=100, size=(2, 20))
    expected_outputs = None

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs, np.stack([data_inputs, data_inputs],
                                            axis=1))
Esempio n. 14
0
def test_feature_union_should_fit_transform_with_numpy_transpose():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=NumpyTranspose())])
    data_inputs = np.random.randint((1, 20))
    expected_outputs = np.random.randint((1, 20))

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs,
                          np.array([data_inputs, data_inputs]).transpose())
Esempio n. 15
0
def test_feature_union_should_fit_transform_with_concatenate_inner_features():
    p = Pipeline([
        FeatureUnion([
            Identity(),
            Identity(),
        ],
                     joiner=NumpyConcatenateInnerFeatures())
    ])
    data_inputs = np.random.randint((1, 20))
    expected_outputs = np.random.randint((1, 20))

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs, np.concatenate([data_inputs, data_inputs]))
Esempio n. 16
0
def test_fit_transform_should_fit_then_use_cache(tmpdir):
    tape_transform = TapeCallbackFunction()
    tape_fit = TapeCallbackFunction()
    p = Pipeline([
        JoblibValueCachingWrapper(
            LogFitTransformCallbackStep(tape_transform,
                                        tape_fit,
                                        transform_function=np.log), tmpdir)
    ])

    p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4])

    assert outputs == EXPECTED_OUTPUTS
    assert tape_transform.data == [[1], [2]]
    assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
Esempio n. 17
0
def test_should_flush_cache_on_every_fit(tmpdir):
    tape_transform = TapeCallbackFunction()
    tape_fit = TapeCallbackFunction()
    wrapper = JoblibValueCachingWrapper(LogFitTransformCallbackStep(
        tape_transform, tape_fit, transform_function=np.log),
                                        cache_folder=tmpdir)
    p = Pipeline([wrapper])
    wrapper.create_checkpoint_path()
    wrapper.write_cache(1, 10)
    wrapper.write_cache(2, 20)

    p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4])

    assert outputs == EXPECTED_OUTPUTS
    assert tape_transform.data == [[1], [2]]
    assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
Esempio n. 18
0
def test_data_shuffling_should_shuffle_data_inputs_and_expected_outputs():
    callback_fit = TapeCallbackFunction()
    callback_transform = TapeCallbackFunction()
    data_shuffler = Pipeline([
        DataShuffler(seed=42, increment_seed_after_each_fit=True),
        FitTransformCallbackStep(callback_transform, callback_fit)
    ])
    data_inputs = np.array(range(10))
    expected_outputs = np.array(range(10, 20))

    outputs = data_shuffler.fit_transform(data_inputs, expected_outputs)

    assert not np.array_equal(outputs, data_inputs)
    assert not np.array_equal(callback_fit.data[0][0], data_inputs)
    assert not np.array_equal(callback_fit.data[0][1], expected_outputs)
    assert not np.array_equal(callback_transform.data, data_inputs)
Esempio n. 19
0
def test_fit_transform_should_fit_transform_all_steps_for_each_data_inputs_expected_outputs():
    tape = TapeCallbackFunction()
    tape_fit = TapeCallbackFunction()
    p = Pipeline([
        ForEachDataInput(Pipeline([
            FitTransformCallbackStep(tape.callback, tape_fit, ["1"]),
            FitTransformCallbackStep(tape.callback, tape_fit, ["2"]),
        ]))
    ])
    data_inputs = [[0, 1], [1, 2]]
    expected_outputs = [[2, 3], [4, 5]]

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert tape.get_name_tape() == ["1", "2", "1", "2"]
    assert tape_fit.get_name_tape() == ["1", "2", "1", "2"]
    assert tape_fit.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
Esempio n. 20
0
def test_should_save_checkpoint_pickle(tmpdir: LocalPath):
    tape = TapeCallbackFunction()
    pickle_checkpoint_step = PickleCheckpointStep('1', tmpdir)
    pipeline = Pipeline(steps=[
        TransformCallbackStep(tape.callback, ["1"]), pickle_checkpoint_step,
        TransformCallbackStep(tape.callback, ["2"]),
        TransformCallbackStep(tape.callback, ["3"])
    ])

    pipeline, actual_data_inputs = pipeline.fit_transform(
        data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert actual_data_inputs == data_inputs
    assert actual_tape == ["1", "2", "3"]
    assert os.path.exists(
        pickle_checkpoint_step.get_checkpoint_file_path(data_inputs))
Esempio n. 21
0
def main():
    np.random.seed(42)
    X = np.random.randint(5, size=(100, 5))

    # Create and fit the pipeline:
    pipeline = Pipeline([
        StandardScaler(),
        Identity(),
        Pipeline([
            Identity(),
            Identity(),  # Note: an Identity step is a step that does nothing.
            Identity(),  # We use it here for demonstration purposes.
            Pipeline([Identity(), PCA(n_components=2)])
        ])
    ])
    pipeline, X_t = pipeline.fit_transform(X)

    # Get the components:
    pca_components = pipeline["Pipeline"]["Pipeline"][
        -1].get_wrapped_sklearn_predictor().components_
    assert pca_components.shape == (2, 5)
Esempio n. 22
0
def test_pipeline_simple_mutate_inverse_transform():
    expected_tape = ["1", "2", "3", "4", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        TransformCallbackStep(tape.callback, ["3"]),
        TransformCallbackStep(tape.callback, ["4"]),
        Identity()
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))

    print("[mutating]")
    p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform")

    p.transform(np.ones((1, 1)))

    assert expected_tape == tape.get_name_tape()
Esempio n. 23
0
def test_should_load_checkpoint_pickle(tmpdir: LocalPath):
    tape = TapeCallbackFunction()
    force_checkpoint_name = 'checkpoint_a'
    pickle_checkpoint_step = PickleCheckpointStep(
        force_checkpoint_name=force_checkpoint_name, cache_folder=tmpdir)
    pickle_checkpoint_step.set_checkpoint_path(force_checkpoint_name)
    with open(pickle_checkpoint_step.get_checkpoint_file_path(data_inputs),
              'wb') as file:
        pickle.dump(data_inputs, file)

    pipeline = Pipeline(
        steps=[('a', TransformCallbackStep(tape.callback, ["1"])
                ), ('b', TransformCallbackStep(tape.callback, ["2"])
                    ), (force_checkpoint_name, pickle_checkpoint_step
                        ), ('c', TransformCallbackStep(tape.callback, ["3"]))])

    pipeline, actual_data_inputs = pipeline.fit_transform(
        data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert actual_data_inputs == data_inputs
    assert actual_tape == ["3"]
Esempio n. 24
0
def test_expand_dim_fit_transform():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])
    p['ExpandDim'].hashers = [SomeSummaryHasher(fake_summary_id=SUMMARY_ID)]

    p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10)))

    assert np.array_equal(outputs, np.array(range(10)))
    assert handle_transform_callback.data == []
    assert handle_fit_callback.data == []
    assert handle_fit_transform_callback.data[0][0].current_ids == [SUMMARY_ID]
    assert handle_fit_transform_callback.data[0][0].summary_id == SUMMARY_ID
    assert np.array_equal(
        np.array(handle_fit_transform_callback.data[0][0].data_inputs),
        np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_transform_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))
Esempio n. 25
0
def test_expand_dim_fit_transform():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])

    p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10)))

    assert np.array_equal(outputs, np.array(range(10)))
    assert handle_transform_callback.data == []
    assert handle_fit_callback.data == []
    assert handle_fit_transform_callback.data[0][0].current_ids == [
        '781e5e245d69b566979b86e28d23f2c7'
    ]
    assert np.array_equal(
        np.array(handle_fit_transform_callback.data[0][0].data_inputs),
        np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_transform_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))
Esempio n. 26
0
def main(chosen_device):
    exercice_number = 1
    print('exercice {}\n=================='.format(exercice_number))

    data_inputs, expected_outputs = generate_data(
        # See: https://github.com/guillaume-chevalier/seq2seq-signal-prediction/blob/master/datasets.py
        exercice_number=exercice_number,
        n_samples=None,
        window_size_past=None,
        window_size_future=None)

    print('data_inputs shape: {} => (n_samples, window_size_past, input_dim)'.
          format(data_inputs.shape))
    print(
        'expected_outputs shape: {} => (n_samples, window_size_future, output_dim)'
        .format(expected_outputs.shape))

    sequence_length = data_inputs.shape[1]
    input_dim = data_inputs.shape[2]
    output_dim = expected_outputs.shape[2]

    batch_size = 100
    epochs = 3
    validation_size = 0.15
    max_plotted_validation_predictions = 10

    seq2seq_pipeline_hyperparams = HyperparameterSamples({
        'hidden_dim':
        100,
        'layers_stacked_count':
        2,
        'lambda_loss_amount':
        0.0003,
        'learning_rate':
        0.006,
        'window_size_future':
        sequence_length,
        'output_dim':
        output_dim,
        'input_dim':
        input_dim
    })
    feature_0_metric = metric_3d_to_2d_wrapper(mean_squared_error)
    metrics = {'mse': feature_0_metric}

    signal_prediction_pipeline = Pipeline([
        ForEachDataInput(MeanStdNormalizer()),
        ToNumpy(),
        PlotPredictionsWrapper(
            Tensorflow2ModelStep(
                # See: https://github.com/Neuraxio/Neuraxle-TensorFlow
                create_model=create_model,
                create_loss=create_loss,
                create_optimizer=create_optimizer,
                expected_outputs_dtype=tf.dtypes.float32,
                data_inputs_dtype=tf.dtypes.float32,
                print_loss=True).set_hyperparams(seq2seq_pipeline_hyperparams))
    ]).set_name('SignalPrediction')

    pipeline = Pipeline([
        EpochRepeater(ValidationSplitWrapper(
            MetricsWrapper(Pipeline([
                TrainOnlyWrapper(DataShuffler()),
                MiniBatchSequentialPipeline([
                    MetricsWrapper(signal_prediction_pipeline,
                                   metrics=metrics,
                                   name='batch_metrics')
                ],
                                            batch_size=batch_size)
            ]),
                           metrics=metrics,
                           name='epoch_metrics',
                           print_metrics=True),
            test_size=validation_size,
            scoring_function=feature_0_metric),
                      epochs=epochs)
    ])

    pipeline, outputs = pipeline.fit_transform(data_inputs, expected_outputs)

    plot_metrics(pipeline=pipeline, exercice_number=exercice_number)
    plot_predictions(data_inputs, expected_outputs, pipeline,
                     max_plotted_validation_predictions)