Example #1
0
def test_model_stacking_fit_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking, outputs = model_stacking.fit_transform(
        data_inputs, expected_outputs)

    assert outputs.shape == expected_outputs_shape
Example #2
0
def test_automl_should_shallow_copy_data_before_each_epoch():
    # see issue #332 https://github.com/Neuraxio/Neuraxle/issues/332
    data_inputs = np.random.randint(0, 100, (100, 3))
    expected_outputs = np.random.randint(0, 3, 100)

    from sklearn.preprocessing import StandardScaler
    p = Pipeline([
        SKLearnWrapper(StandardScaler()),
        SKLearnWrapper(LinearSVC(),
                       HyperparameterSpace({'C': RandInt(0, 10000)})),
    ])

    auto_ml = AutoML(p,
                     validation_splitter=ValidationSplitter(0.20),
                     refit_trial=True,
                     n_trials=10,
                     epochs=10,
                     cache_folder_when_no_handle='cache',
                     scoring_callback=ScoringCallback(
                         mean_squared_error, higher_score_is_better=False),
                     callbacks=[
                         MetricCallback('mse',
                                        metric_function=mean_squared_error,
                                        higher_score_is_better=False)
                     ],
                     hyperparams_repository=InMemoryHyperparamsRepository(
                         cache_folder='cache'),
                     continue_loop_on_error=False)

    random_search = auto_ml.fit(data_inputs, expected_outputs)

    best_model = random_search.get_best_model()

    assert isinstance(best_model, Pipeline)
Example #3
0
def test_sklearn_wrapper_fit_transform_with_predict():
    p = SKLearnWrapper(LinearRegression())
    data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1)
    expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1)

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs, expected_outputs)
Example #4
0
def test_sklearn_wrapper_fit_transform_with_transform():
    n_components = 2
    p = SKLearnWrapper(PCA(n_components=n_components))
    dim1 = 10
    dim2 = 10
    data_inputs, expected_outputs = _create_data_source((dim1, dim2))

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert outputs.shape == (dim1, n_components)
Example #5
0
def test_sklearn_wrapper_transform_partial_fit_with_predict():
    model = SKLearnWrapper(SGDRegressor(), use_partial_fit=True)
    p = Pipeline([DataShuffler(), model])
    data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1)
    expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1)

    for _ in range(2000):
        p = p.fit(data_inputs, expected_outputs)
    outputs = model.transform(data_inputs)

    assert all([
        np.isclose(a, b, atol=0.1) for a, b in zip(expected_outputs, outputs)
    ])
def test_deep_learning_pipeline_with_random_search():
    # Given
    data_inputs, expected_outputs = create_2d_data()

    p = RandomSearch(DeepLearningPipeline(
        SKLearnWrapper(linear_model.LinearRegression()),
        batch_size=BATCH_SIZE,
        batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        shuffle_in_each_epoch_at_train=True,
        n_epochs=N_EPOCHS,
        epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        scoring_function=to_numpy_metric_wrapper(mean_squared_error),
        validation_size=0.15),
                     n_iter=N_ITER)

    # When
    p, outputs = p.fit_transform(data_inputs, expected_outputs)
    best_model = p.get_best_model()
    best_model.set_train(False)
    best_model.apply('disable_metrics')

    # Then
    outputs = best_model.transform(data_inputs)

    mse = ((outputs - expected_outputs)**2).mean()
    assert mse < 2
Example #7
0
def test_sklearn_wrapper_transform_partial_fit_classifier():
    data_inputs = np.array([[0, 1], [0, 0], [3, -2], [-1, 1], [-2, 1], [2, 0],
                            [2, -1], [4, -2], [-3, 1], [-1, 0]])
    expected_outputs = np.ravel(
        np.expand_dims(data_inputs[:, 0] + 2 * data_inputs[:, 1] + 1, axis=-1))
    classes = np.array([0, 1, 2, 3])
    model = SKLearnWrapper(SGDClassifier(),
                           use_partial_fit=True,
                           partial_fit_kwargs={'classes': classes})
    p = Pipeline([DataShuffler(), model])

    for _ in range(2000):
        p = p.fit(data_inputs, expected_outputs)
    outputs = model.transform(data_inputs)

    assert outputs.shape == (10, )
    assert len(set(outputs) - set(classes)) == 0
Example #8
0
def test_deep_learning_pipeline():
    # Given
    data_inputs, expected_outputs = create_2d_data()

    p = DeepLearningPipeline(
        SKLearnWrapper(linear_model.LinearRegression()),
        validation_size=VALIDATION_SIZE,
        batch_size=BATCH_SIZE,
        batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        shuffle_in_each_epoch_at_train=True,
        n_epochs=N_EPOCHS,
        epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        scoring_function=to_numpy_metric_wrapper(mean_squared_error),
    )

    # When
    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    metrics = p.apply('get_metrics')

    # Then
    batch_mse_train = metrics[
        'DeepLearningPipeline__EpochRepeater__validation_split_wrapper__epoch_metrics'][
            'train']['mse']
    epoch_mse_train = metrics[
        'DeepLearningPipeline__EpochRepeater__validation_split_wrapper__epoch_metrics__TrainShuffled__MiniBatchSequentialPipeline__batch_metrics'][
            'train']['mse']

    batch_mse_validation = metrics[
        'DeepLearningPipeline__EpochRepeater__validation_split_wrapper__epoch_metrics__TrainShuffled__MiniBatchSequentialPipeline__batch_metrics'][
            'validation']['mse']
    epoch_mse_validation = metrics[
        'DeepLearningPipeline__EpochRepeater__validation_split_wrapper__epoch_metrics'][
            'validation']['mse']

    assert len(epoch_mse_train) == N_EPOCHS
    assert len(epoch_mse_validation) == N_EPOCHS

    expected_len_batch_mse = math.ceil(
        (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS

    assert len(batch_mse_train) == expected_len_batch_mse
    assert len(batch_mse_validation) == expected_len_batch_mse

    last_batch_mse_validation = batch_mse_validation[-1]
    last_batch_mse_train = batch_mse_train[-1]

    last_epoch_mse_train = epoch_mse_train[-1]
    last_epoch_mse_validation = epoch_mse_validation[-1]

    assert last_batch_mse_train < last_batch_mse_validation
    assert last_epoch_mse_train < last_epoch_mse_validation
Example #9
0
def test_automl_sklearn_model_with_base_estimator(tmpdir):
    grad_boost = GradientBoostingRegressor()
    bagged_regressor = BaggingRegressor(grad_boost, random_state=5, n_jobs=-1)

    wrapped_bagged_regressor = SKLearnWrapper(
        bagged_regressor,
        HyperparameterSpace({
            "n_estimators": RandInt(10, 100),
            "max_features": Uniform(0.6, 1.0)
        }),
        #  return_all_sklearn_default_params_on_get=True
    )
    _test_within_auto_ml_loop(tmpdir, wrapped_bagged_regressor)
Example #10
0
def test_pipeline_tosklearn():
    import sklearn.pipeline
    the_step = SomeStep()
    step_to_check = the_step.tosklearn()

    p = Pipeline([
        ("a", SomeStep()),
        ("b", SKLearnWrapper(sklearn.pipeline.Pipeline([
            ("a", sklearn.pipeline.Pipeline([
                ('z', step_to_check)
            ])),
            ("b", SomeStep().tosklearn()),
            ("c", SomeStep().tosklearn())
        ]), return_all_sklearn_default_params_on_get=True)),
        ("c", SomeStep())
    ])

    # assert False
    p.set_hyperparams({
        "b": {
            "a__z__learning_rate": 7,
            "b__learning_rate": 9
        }
    })
    assert the_step.get_hyperparams()["learning_rate"] == 7

    p = p.tosklearn()
    p = sklearn.pipeline.Pipeline([('sk', p)])

    p.set_params(**{"sk__b__a__z__learning_rate": 11})

    sk_ = p.named_steps["sk"]
    b_ = sk_.p["b"]
    predictor = b_.wrapped_sklearn_predictor
    a_ = predictor.named_steps["a"]
    z_ = a_["z"]
    assert z_.get_params()["learning_rate"] == 11

    p.set_params(**nested_dict_to_flat({
        "sk__b": {
            "a__z__learning_rate": 12,
            "b__learning_rate": 9
        }
    }))
    # p.set_params(**{"sk__b__a__z__learning_rate": 12})
    assert p.named_steps["sk"].p["b"].wrapped_sklearn_predictor.named_steps["a"]["z"].get_params()[
               "learning_rate"] == 12
    print(the_step.get_hyperparams())
Example #11
0
def test_sklearn_wrapper_update_hyperparams():
    p = SKLearnWrapper(PCA())
    p.set_hyperparams(
        HyperparameterSamples({
            'n_components': 2,
            'svd_solver': 'full'
        }))
    p.update_hyperparams(HyperparameterSamples({'n_components': 4}))

    assert p.wrapped_sklearn_predictor.n_components == 4
    assert p.wrapped_sklearn_predictor.svd_solver == 'full'
Example #12
0
from sklearn.utils import shuffle

from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyShapePrinter
from neuraxle.steps.sklearn import SKLearnWrapper, RidgeModelStacking
from neuraxle.union import AddFeatures

boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

p = Pipeline([
    NumpyShapePrinter(),
    AddFeatures([
        SKLearnWrapper(PCA(n_components=2)),
        SKLearnWrapper(FastICA(n_components=2)),
    ]),
    NumpyShapePrinter(),
    RidgeModelStacking([
        SKLearnWrapper(GradientBoostingRegressor()),
        SKLearnWrapper(GradientBoostingRegressor(n_estimators=500)),
        SKLearnWrapper(GradientBoostingRegressor(max_depth=5)),
        SKLearnWrapper(KMeans()),
    ]),
    NumpyShapePrinter(),
])

print("Fitting on train:")
p = p.fit(X_train, y_train)
print("")
Example #13
0
def test_deep_learning_pipeline():
    # Given
    boston = load_boston()
    data_inputs, expected_outputs = shuffle(boston.data,
                                            boston.target,
                                            random_state=13)
    expected_outputs = expected_outputs.astype(np.float32)
    data_inputs = data_inputs.astype(np.float32)

    pipeline = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
        ]),
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(n_clusters=7),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])

    p = DeepLearningPipeline(
        pipeline,
        validation_size=VALIDATION_SIZE,
        batch_size=BATCH_SIZE,
        batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        shuffle_in_each_epoch_at_train=True,
        n_epochs=N_EPOCHS,
        epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        scoring_function=to_numpy_metric_wrapper(mean_squared_error),
    )

    # When
    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    # Then
    batch_mse_train = p.get_batch_metric_train('mse')
    epoch_mse_train = p.get_epoch_metric_train('mse')

    batch_mse_validation = p.get_batch_metric_validation('mse')
    epoch_mse_validation = p.get_epoch_metric_validation('mse')

    assert len(epoch_mse_train) == N_EPOCHS
    assert len(epoch_mse_validation) == N_EPOCHS

    expected_len_batch_mse_train = math.ceil(
        (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS
    expected_len_batch_mse_validation = math.ceil(
        (len(data_inputs) / BATCH_SIZE) * VALIDATION_SIZE) * N_EPOCHS

    assert len(batch_mse_train) == expected_len_batch_mse_train
    assert len(batch_mse_validation) == expected_len_batch_mse_validation

    last_batch_mse_validation = batch_mse_validation[-1]
    last_batch_mse_train = batch_mse_train[-1]

    last_epoch_mse_train = epoch_mse_train[-1]
    last_epoch_mse_validation = epoch_mse_validation[-1]

    assert last_batch_mse_train < last_batch_mse_validation
    assert last_epoch_mse_train < last_epoch_mse_validation
    assert last_batch_mse_train < 1
    assert last_epoch_mse_train < 1
Example #14
0
def main(tmpdir):
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

    # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
    # within the classes ar their definition if using custom classes, or also it could be defined after declaring the
    # pipeline using a flat dict or a nested dict.

    p = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})
            ),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})
            ),
        ]),
        ModelStacking([
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
                    "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4),
                    "learning_rate": LogUniform(0.07, 0.7)
                })
            ),
            SKLearnWrapper(
                KMeans(),
                HyperparameterSpace({"n_clusters": RandInt(5, 10)})
            ),
        ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
            ),
        )
    ])

    print("Meta-fitting on train:")
    auto_ml = AutoML(
        p,
        validation_splitter=ValidationSplitter(0.20),
        refit_trial=True,
        n_trials=10,
        epochs=1,  # 1 epoc here due to using sklearn models that just fit once.
        cache_folder_when_no_handle=str(tmpdir),
        scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False),
        callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)],
        hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=str(tmpdir))
    )

    random_search = auto_ml.fit(X_train, y_train)
    p = random_search.get_best_model()
    print("")

    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)

    print("")

    print("Evaluating transformed train:")
    score_transform = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_transform)

    print("")

    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)
Example #15
0
def test_sklearn_wrapper_with_an_invalid_step():
    with pytest.raises(ValueError):
        SKLearnWrapper(Identity())
Example #16
0
def main():
    # Define classification models, and hyperparams.
    # See also HyperparameterSpace documentation : https://www.neuraxle.org/stable/api/neuraxle.hyperparams.space.html#neuraxle.hyperparams.space.HyperparameterSpace

    decision_tree_classifier = SKLearnWrapper(
        DecisionTreeClassifier(),
        HyperparameterSpace({
            'criterion': Choice(['gini', 'entropy']),
            'splitter': Choice(['best', 'random']),
            'min_samples_leaf': RandInt(2, 5),
            'min_samples_split': RandInt(2, 4)
        }))

    extra_tree_classifier = SKLearnWrapper(
        ExtraTreeClassifier(),
        HyperparameterSpace({
            'criterion': Choice(['gini', 'entropy']),
            'splitter': Choice(['best', 'random']),
            'min_samples_leaf': RandInt(2, 5),
            'min_samples_split': RandInt(2, 4)
        }))

    ridge_classifier = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            RidgeClassifier(),
            HyperparameterSpace({
                'alpha': Choice([0.0, 1.0, 10.0, 100.0]),
                'fit_intercept': Boolean(),
                'normalize': Boolean()
            }))
    ]).set_name('RidgeClassifier')

    logistic_regression = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            LogisticRegression(),
            HyperparameterSpace({
                'C': LogUniform(0.01, 10.0),
                'fit_intercept': Boolean(),
                'penalty': Choice(['none', 'l2']),
                'max_iter': RandInt(20, 200)
            }))
    ]).set_name('LogisticRegression')

    random_forest_classifier = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            RandomForestClassifier(),
            HyperparameterSpace({
                'n_estimators': RandInt(50, 600),
                'criterion': Choice(['gini', 'entropy']),
                'min_samples_leaf': RandInt(2, 5),
                'min_samples_split': RandInt(2, 4),
                'bootstrap': Boolean()
            }))
    ]).set_name('RandomForestClassifier')

    # Define a classification pipeline that lets the AutoML loop choose one of the classifier.
    # See also ChooseOneStepOf documentation : https://www.neuraxle.org/stable/api/neuraxle.steps.flow.html#neuraxle.steps.flow.ChooseOneStepOf

    pipeline = Pipeline([
        ChooseOneStepOf([
            decision_tree_classifier, extra_tree_classifier, ridge_classifier,
            logistic_regression, random_forest_classifier
        ])
    ])

    # Create the AutoML loop object.
    # See also AutoML documentation : https://www.neuraxle.org/stable/api/neuraxle.metaopt.auto_ml.html#neuraxle.metaopt.auto_ml.AutoML

    auto_ml = AutoML(
        pipeline=pipeline,
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(test_size=0.20),
        scoring_callback=ScoringCallback(accuracy_score,
                                         higher_score_is_better=True),
        n_trials=7,
        epochs=1,
        hyperparams_repository=HyperparamsJSONRepository(cache_folder='cache'),
        refit_trial=True,
        continue_loop_on_error=False)

    # Load data, and launch AutoML loop !

    X_train, y_train, X_test, y_test = generate_classification_data()
    auto_ml = auto_ml.fit(X_train, y_train)

    # Get the model from the best trial, and make predictions using predict.
    # See also predict documentation : https://www.neuraxle.org/stable/api/neuraxle.base.html#neuraxle.base.BaseStep.predict

    best_pipeline = auto_ml.get_best_model()
    y_pred = best_pipeline.predict(X_test)

    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
    print("Test accuracy score:", accuracy)

    shutil.rmtree('cache')
Example #17
0
def test_automl_sklearn(tmpdir):
    grad_boost = SKLearnWrapper(GradientBoostingRegressor())
    _test_within_auto_ml_loop(tmpdir, grad_boost)
Example #18
0
def test_sklearn_wrapper_set_hyperparams():
    p = SKLearnWrapper(PCA())
    p.set_hyperparams(HyperparameterSamples({'n_components': 2}))

    assert p.wrapped_sklearn_predictor.n_components == 2
from neuraxle.union import AddFeatures, ModelStacking

boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    shuffle=False)

# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
    AddFeatures([
        SKLearnWrapper(PCA(n_components=2),
                       HyperparameterSpace({"n_components": RandInt(1, 3)})),
        SKLearnWrapper(FastICA(n_components=2),
                       HyperparameterSpace({"n_components": RandInt(1, 3)})),
    ]),
    ModelStacking(
        [
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
                    "n_estimators": RandInt(50, 600),
                    "max_depth": RandInt(1, 10),
                    "learning_rate": LogUniform(0.07, 0.7)
                })),
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
Example #20
0
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
    # within the classes ar their definition if using custom classes, or also it could be defined after declaring the
    # pipeline using a flat dict or a nested dict.

    p = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
        ]),
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    print("Meta-fitting on train:")
    p = p.meta_fit(X_train,
                   y_train,
                   metastep=RandomSearch(
                       n_iter=10,
                       higher_score_is_better=True,
                       validation_technique=KFoldCrossValidationWrapper(
                           scoring_function=r2_score, k_fold=10)))
    # Here is an alternative way to do it, more "pipeliney":
    # p = RandomSearch(
    #     p,
    #     n_iter=15,
    #     higher_score_is_better=True,
    #     validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)
    # ).fit(X_train, y_train)

    print("")

    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)

    print("")

    print("Evaluating transformed train:")
    score_transform = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_transform)

    print("")

    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)