예제 #1
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
        ],
            joiner=NumpyTranspose(),
            judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
        )
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()

    assert "AddFeatures" in hyperparams.keys()
    assert "SomeStep" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert "SomeStep1" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]
    assert "SomeStep" in hyperparams["ModelStacking"]
    assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"]
    assert "SomeStep1" in hyperparams["ModelStacking"]
    assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
예제 #2
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)}))
        ],
                      joiner=NumpyTranspose(),
                      judge=SomeStep(hyperparams_space=HyperparameterSpace(
                          {"alpha": LogUniform(0.1, 10.0)})))
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()
    flat_hyperparams_keys = hyperparams.to_flat_dict().keys()

    assert 'AddFeatures' in hyperparams
    assert 'SomeStep' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]

    assert 'ModelStacking' in hyperparams
    assert 'SomeStep' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep1"]
    assert 'SomeStep2' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep2"]
    assert 'SomeStep3' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep3"]

    assert 'AddFeatures__SomeStep1__n_components' in flat_hyperparams_keys
    assert 'AddFeatures__SomeStep__n_components' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep1__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep2__max_depth' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep3__max_depth' in flat_hyperparams_keys
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    p = Pipeline([
        NumpyShapePrinter(),
        AddFeatures([
            PCA(n_components=2),
            FastICA(n_components=2),
        ]),
        NumpyShapePrinter(),
        RidgeModelStacking([
            GradientBoostingRegressor(),
            GradientBoostingRegressor(n_estimators=500),
            GradientBoostingRegressor(max_depth=5),
            KMeans(),
        ]),
        NumpyShapePrinter(),
    ])

    print("Fitting on train:")
    p = p.fit(X_train, y_train)
    print("")
    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)
    print("")
    print("Evaluating transformed train:")
    score_train = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_train)
    print("")
    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)

    assert y_train_predicted.shape == (379, )
    assert y_test_predicted.shape == (127, )
    assert isinstance(score_train, float)
    assert isinstance(score_test, float)

    return y_train_predicted, y_test_predicted, score_train, score_test
예제 #4
0
def train_neuraxle(X_train, X_test, y_train, y_test, mtype, common_name_model,
                   problemtype, classes, default_featurenames, transform_model,
                   settings, model_session):

    # get train and test data
    model_name = common_name_model + '.pickle'
    files = list()

    if mtype in ['classification', 'c']:
        print('neuraxle currently does not support classsification...')

    elif mtype in ['regression', 'r']:

        p = Pipeline([
            NumpyShapePrinter(),
            AddFeatures([
                PCA(n_components=2),
                FastICA(n_components=2),
            ]),
            NumpyShapePrinter(),
            RidgeModelStacking([
                GradientBoostingRegressor(),
                GradientBoostingRegressor(n_estimators=500),
                GradientBoostingRegressor(max_depth=5),
                KMeans(),
            ]),
            NumpyShapePrinter(),
        ])

        # Fitting and evaluating the pipeline.
        # X_train data shape: (batch, different_lengths, n_feature_columns)
        # y_train data shape: (batch, different_lengths)
        pipeline = p.fit(X_train, y_train)

        # export pickle file
        print('saving model - %s' % (model_name))
        f = open(model_name, 'wb')
        pickle.dump(pipeline, f)
        f.close()

        files.append(model_name)

    model_dir = os.getcwd()

    return model_name, model_dir, files
예제 #5
0
def test_tape_callback():
    expected_tape = ["1", "2", "3", "a", "b", "4"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        TransformCallbackStep(tape.callback, ["3"]),
        AddFeatures([
            TransformCallbackStep(tape.callback, ["a"]),
            TransformCallbackStep(tape.callback, ["b"]),
        ]),
        TransformCallbackStep(tape.callback, ["4"]),
        Identity()
    ])
    p.fit_transform(np.ones((1, 1)))

    assert tape.get_name_tape() == expected_tape
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    shuffle=False)

# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
    AddFeatures([
        SKLearnWrapper(PCA(n_components=2),
                       HyperparameterSpace({"n_components": RandInt(1, 3)})),
        SKLearnWrapper(FastICA(n_components=2),
                       HyperparameterSpace({"n_components": RandInt(1, 3)})),
    ]),
    ModelStacking(
        [
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
                    "n_estimators": RandInt(50, 600),
                    "max_depth": RandInt(1, 10),
                    "learning_rate": LogUniform(0.07, 0.7)
                })),
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
                    "n_estimators": RandInt(50, 600),
예제 #7
0
from sklearn.utils import shuffle

from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyShapePrinter
from neuraxle.steps.sklearn import SKLearnWrapper, RidgeModelStacking
from neuraxle.union import AddFeatures

boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

p = Pipeline([
    NumpyShapePrinter(),
    AddFeatures([
        SKLearnWrapper(PCA(n_components=2)),
        SKLearnWrapper(FastICA(n_components=2)),
    ]),
    NumpyShapePrinter(),
    RidgeModelStacking([
        SKLearnWrapper(GradientBoostingRegressor()),
        SKLearnWrapper(GradientBoostingRegressor(n_estimators=500)),
        SKLearnWrapper(GradientBoostingRegressor(max_depth=5)),
        SKLearnWrapper(KMeans()),
    ]),
    NumpyShapePrinter(),
])

print("Fitting on train:")
p = p.fit(X_train, y_train)
print("")
예제 #8
0
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
    # within the classes ar their definition if using custom classes, or also it could be defined after declaring the
    # pipeline using a flat dict or a nested dict.

    p = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
        ]),
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    print("Meta-fitting on train:")
    p = p.meta_fit(X_train,
                   y_train,
                   metastep=RandomSearch(
                       n_iter=10,
                       higher_score_is_better=True,
                       validation_technique=KFoldCrossValidationWrapper(
                           scoring_function=r2_score, k_fold=10)))
    # Here is an alternative way to do it, more "pipeliney":
    # p = RandomSearch(
    #     p,
    #     n_iter=15,
    #     higher_score_is_better=True,
    #     validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)
    # ).fit(X_train, y_train)

    print("")

    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)

    print("")

    print("Evaluating transformed train:")
    score_transform = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_transform)

    print("")

    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)
예제 #9
0
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    pipeline = Pipeline([
        AddFeatures([
            PCA(n_components=2),
            FastICA(n_components=2),
        ]),
        RidgeModelStacking([
            GradientBoostingRegressor(),
            KMeans(),
        ]),
    ])

    print("Fitting on train:")
    pipeline = pipeline.fit(X_train, y_train)
    print("")
    print("Transforming train and test:")
    y_train_predicted = pipeline.transform(X_train)
    y_test_predicted = pipeline.transform(X_test)
    print("")
    print("Evaluating transformed train:")
    score = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score)
    print("")
    print("Evaluating transformed test:")
    score = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score)
    print("Deploying the application by routing data to the transform method:")

    class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder):
        """This is a custom JSON decoder class that precedes the pipeline's transformation."""
        def decode(self, data_inputs):
            """
            Transform a JSON list object into an np.array object.

            :param data_inputs: json object
            :return: np array for data inputs
            """
            return np.array(data_inputs)

    class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder):
        """This is a custom JSON response encoder class for converting the pipeline's transformation outputs."""
        def encode(self, data_inputs) -> dict:
            """
            Convert predictions to a dict for creating a JSON Response object.

            :param data_inputs:
            :return:
            """
            return {'predictions': list(data_inputs)}

    app = FlaskRestApiWrapper(
        json_decoder=CustomJSONDecoderFor2DArray(),
        wrapped=pipeline,
        json_encoder=CustomJSONEncoderOfOutputs()).get_app()

    print("Finally, run the app by uncommenting this next line of code:")

    # app.run(debug=False, port=5000)

    print("You can now call your pipeline over HTTP with a (JSON) REST API.")

    # test_predictictions = requests.post(
    #     url='http://127.0.0.1:5000/',
    #     json=X_test.tolist()
    # )
    # print(test_predictictions)
    # print(test_predictictions.content)

    assert isinstance(app, Flask)

    return app
예제 #10
0
def main(tmpdir):
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

    # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
    # within the classes ar their definition if using custom classes, or also it could be defined after declaring the
    # pipeline using a flat dict or a nested dict.

    p = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})
            ),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})
            ),
        ]),
        ModelStacking([
            SKLearnWrapper(
                GradientBoostingRegressor(),
                HyperparameterSpace({
                    "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4),
                    "learning_rate": LogUniform(0.07, 0.7)
                })
            ),
            SKLearnWrapper(
                KMeans(),
                HyperparameterSpace({"n_clusters": RandInt(5, 10)})
            ),
        ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
            ),
        )
    ])

    print("Meta-fitting on train:")
    auto_ml = AutoML(
        p,
        validation_splitter=ValidationSplitter(0.20),
        refit_trial=True,
        n_trials=10,
        epochs=1,  # 1 epoc here due to using sklearn models that just fit once.
        cache_folder_when_no_handle=str(tmpdir),
        scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False),
        callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)],
        hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=str(tmpdir))
    )

    random_search = auto_ml.fit(X_train, y_train)
    p = random_search.get_best_model()
    print("")

    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)

    print("")

    print("Evaluating transformed train:")
    score_transform = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_transform)

    print("")

    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)
예제 #11
0
def test_deep_learning_pipeline():
    # Given
    boston = load_boston()
    data_inputs, expected_outputs = shuffle(boston.data,
                                            boston.target,
                                            random_state=13)
    expected_outputs = expected_outputs.astype(np.float32)
    data_inputs = data_inputs.astype(np.float32)

    pipeline = Pipeline([
        AddFeatures([
            SKLearnWrapper(
                PCA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
            SKLearnWrapper(
                FastICA(n_components=2),
                HyperparameterSpace({"n_components": RandInt(1, 3)})),
        ]),
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(n_clusters=7),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])

    p = DeepLearningPipeline(
        pipeline,
        validation_size=VALIDATION_SIZE,
        batch_size=BATCH_SIZE,
        batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        shuffle_in_each_epoch_at_train=True,
        n_epochs=N_EPOCHS,
        epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)},
        scoring_function=to_numpy_metric_wrapper(mean_squared_error),
    )

    # When
    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    # Then
    batch_mse_train = p.get_batch_metric_train('mse')
    epoch_mse_train = p.get_epoch_metric_train('mse')

    batch_mse_validation = p.get_batch_metric_validation('mse')
    epoch_mse_validation = p.get_epoch_metric_validation('mse')

    assert len(epoch_mse_train) == N_EPOCHS
    assert len(epoch_mse_validation) == N_EPOCHS

    expected_len_batch_mse_train = math.ceil(
        (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS
    expected_len_batch_mse_validation = math.ceil(
        (len(data_inputs) / BATCH_SIZE) * VALIDATION_SIZE) * N_EPOCHS

    assert len(batch_mse_train) == expected_len_batch_mse_train
    assert len(batch_mse_validation) == expected_len_batch_mse_validation

    last_batch_mse_validation = batch_mse_validation[-1]
    last_batch_mse_train = batch_mse_train[-1]

    last_epoch_mse_train = epoch_mse_train[-1]
    last_epoch_mse_validation = epoch_mse_validation[-1]

    assert last_batch_mse_train < last_batch_mse_validation
    assert last_epoch_mse_train < last_epoch_mse_validation
    assert last_batch_mse_train < 1
    assert last_epoch_mse_train < 1