def test_hyperparam_space(): p = Pipeline([ AddFeatures([ SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})) ]), ModelStacking([ SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})) ], joiner=NumpyTranspose(), judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)})) ) ]) rvsed = p.get_hyperparams_space() p.set_hyperparams(rvsed) hyperparams = p.get_hyperparams() assert "AddFeatures" in hyperparams.keys() assert "SomeStep" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep"] assert "SomeStep1" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"] assert "SomeStep" in hyperparams["ModelStacking"] assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"] assert "SomeStep1" in hyperparams["ModelStacking"] assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
def test_hyperparam_space(): p = Pipeline([ AddFeatures([ SomeStep(hyperparams_space=HyperparameterSpace( {"n_components": RandInt(1, 5)})), SomeStep(hyperparams_space=HyperparameterSpace( {"n_components": RandInt(1, 5)})) ]), ModelStacking([ SomeStep(hyperparams_space=HyperparameterSpace( {"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace( {"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace( {"max_depth": RandInt(1, 100)})), SomeStep(hyperparams_space=HyperparameterSpace( {"max_depth": RandInt(1, 100)})) ], joiner=NumpyTranspose(), judge=SomeStep(hyperparams_space=HyperparameterSpace( {"alpha": LogUniform(0.1, 10.0)}))) ]) rvsed = p.get_hyperparams_space() p.set_hyperparams(rvsed) hyperparams = p.get_hyperparams() flat_hyperparams_keys = hyperparams.to_flat_dict().keys() assert 'AddFeatures' in hyperparams assert 'SomeStep' in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep"] assert 'SomeStep1' in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"] assert 'ModelStacking' in hyperparams assert 'SomeStep' in hyperparams["ModelStacking"] assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep"] assert 'SomeStep1' in hyperparams["ModelStacking"] assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep1"] assert 'SomeStep2' in hyperparams["ModelStacking"] assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep2"] assert 'SomeStep3' in hyperparams["ModelStacking"] assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep3"] assert 'AddFeatures__SomeStep1__n_components' in flat_hyperparams_keys assert 'AddFeatures__SomeStep__n_components' in flat_hyperparams_keys assert 'ModelStacking__SomeStep__n_estimators' in flat_hyperparams_keys assert 'ModelStacking__SomeStep1__n_estimators' in flat_hyperparams_keys assert 'ModelStacking__SomeStep2__max_depth' in flat_hyperparams_keys assert 'ModelStacking__SomeStep3__max_depth' in flat_hyperparams_keys
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) p = Pipeline([ NumpyShapePrinter(), AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), NumpyShapePrinter(), RidgeModelStacking([ GradientBoostingRegressor(), GradientBoostingRegressor(n_estimators=500), GradientBoostingRegressor(max_depth=5), KMeans(), ]), NumpyShapePrinter(), ]) print("Fitting on train:") p = p.fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_train = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_train) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test) assert y_train_predicted.shape == (379, ) assert y_test_predicted.shape == (127, ) assert isinstance(score_train, float) assert isinstance(score_test, float) return y_train_predicted, y_test_predicted, score_train, score_test
def train_neuraxle(X_train, X_test, y_train, y_test, mtype, common_name_model, problemtype, classes, default_featurenames, transform_model, settings, model_session): # get train and test data model_name = common_name_model + '.pickle' files = list() if mtype in ['classification', 'c']: print('neuraxle currently does not support classsification...') elif mtype in ['regression', 'r']: p = Pipeline([ NumpyShapePrinter(), AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), NumpyShapePrinter(), RidgeModelStacking([ GradientBoostingRegressor(), GradientBoostingRegressor(n_estimators=500), GradientBoostingRegressor(max_depth=5), KMeans(), ]), NumpyShapePrinter(), ]) # Fitting and evaluating the pipeline. # X_train data shape: (batch, different_lengths, n_feature_columns) # y_train data shape: (batch, different_lengths) pipeline = p.fit(X_train, y_train) # export pickle file print('saving model - %s' % (model_name)) f = open(model_name, 'wb') pickle.dump(pipeline, f) f.close() files.append(model_name) model_dir = os.getcwd() return model_name, model_dir, files
def test_tape_callback(): expected_tape = ["1", "2", "3", "a", "b", "4"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]), AddFeatures([ TransformCallbackStep(tape.callback, ["a"]), TransformCallbackStep(tape.callback, ["b"]), ]), TransformCallbackStep(tape.callback, ["4"]), Identity() ]) p.fit_transform(np.ones((1, 1))) assert tape.get_name_tape() == expected_tape
boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper(PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper(FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600),
from sklearn.utils import shuffle from neuraxle.pipeline import Pipeline from neuraxle.steps.numpy import NumpyShapePrinter from neuraxle.steps.sklearn import SKLearnWrapper, RidgeModelStacking from neuraxle.union import AddFeatures boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) p = Pipeline([ NumpyShapePrinter(), AddFeatures([ SKLearnWrapper(PCA(n_components=2)), SKLearnWrapper(FastICA(n_components=2)), ]), NumpyShapePrinter(), RidgeModelStacking([ SKLearnWrapper(GradientBoostingRegressor()), SKLearnWrapper(GradientBoostingRegressor(n_estimators=500)), SKLearnWrapper(GradientBoostingRegressor(max_depth=5)), SKLearnWrapper(KMeans()), ]), NumpyShapePrinter(), ]) print("Fitting on train:") p = p.fit(X_train, y_train) print("")
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) print("Meta-fitting on train:") p = p.meta_fit(X_train, y_train, metastep=RandomSearch( n_iter=10, higher_score_is_better=True, validation_technique=KFoldCrossValidationWrapper( scoring_function=r2_score, k_fold=10))) # Here is an alternative way to do it, more "pipeliney": # p = RandomSearch( # p, # n_iter=15, # higher_score_is_better=True, # validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3) # ).fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) pipeline = Pipeline([ AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), RidgeModelStacking([ GradientBoostingRegressor(), KMeans(), ]), ]) print("Fitting on train:") pipeline = pipeline.fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = pipeline.transform(X_train) y_test_predicted = pipeline.transform(X_test) print("") print("Evaluating transformed train:") score = r2_score(y_train_predicted, y_train) print('R2 regression score:', score) print("") print("Evaluating transformed test:") score = r2_score(y_test_predicted, y_test) print('R2 regression score:', score) print("Deploying the application by routing data to the transform method:") class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder): """This is a custom JSON decoder class that precedes the pipeline's transformation.""" def decode(self, data_inputs): """ Transform a JSON list object into an np.array object. :param data_inputs: json object :return: np array for data inputs """ return np.array(data_inputs) class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder): """This is a custom JSON response encoder class for converting the pipeline's transformation outputs.""" def encode(self, data_inputs) -> dict: """ Convert predictions to a dict for creating a JSON Response object. :param data_inputs: :return: """ return {'predictions': list(data_inputs)} app = FlaskRestApiWrapper( json_decoder=CustomJSONDecoderFor2DArray(), wrapped=pipeline, json_encoder=CustomJSONEncoderOfOutputs()).get_app() print("Finally, run the app by uncommenting this next line of code:") # app.run(debug=False, port=5000) print("You can now call your pipeline over HTTP with a (JSON) REST API.") # test_predictictions = requests.post( # url='http://127.0.0.1:5000/', # json=X_test.tolist() # ) # print(test_predictictions) # print(test_predictictions.content) assert isinstance(app, Flask) return app
def main(tmpdir): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), ]), ModelStacking([ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4), "learning_rate": LogUniform(0.07, 0.7) }) ), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)}) ), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()}) ), ) ]) print("Meta-fitting on train:") auto_ml = AutoML( p, validation_splitter=ValidationSplitter(0.20), refit_trial=True, n_trials=10, epochs=1, # 1 epoc here due to using sklearn models that just fit once. cache_folder_when_no_handle=str(tmpdir), scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False), callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)], hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=str(tmpdir)) ) random_search = auto_ml.fit(X_train, y_train) p = random_search.get_best_model() print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def test_deep_learning_pipeline(): # Given boston = load_boston() data_inputs, expected_outputs = shuffle(boston.data, boston.target, random_state=13) expected_outputs = expected_outputs.astype(np.float32) data_inputs = data_inputs.astype(np.float32) pipeline = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(n_clusters=7), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) p = DeepLearningPipeline( pipeline, validation_size=VALIDATION_SIZE, batch_size=BATCH_SIZE, batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, shuffle_in_each_epoch_at_train=True, n_epochs=N_EPOCHS, epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, scoring_function=to_numpy_metric_wrapper(mean_squared_error), ) # When p, outputs = p.fit_transform(data_inputs, expected_outputs) # Then batch_mse_train = p.get_batch_metric_train('mse') epoch_mse_train = p.get_epoch_metric_train('mse') batch_mse_validation = p.get_batch_metric_validation('mse') epoch_mse_validation = p.get_epoch_metric_validation('mse') assert len(epoch_mse_train) == N_EPOCHS assert len(epoch_mse_validation) == N_EPOCHS expected_len_batch_mse_train = math.ceil( (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS expected_len_batch_mse_validation = math.ceil( (len(data_inputs) / BATCH_SIZE) * VALIDATION_SIZE) * N_EPOCHS assert len(batch_mse_train) == expected_len_batch_mse_train assert len(batch_mse_validation) == expected_len_batch_mse_validation last_batch_mse_validation = batch_mse_validation[-1] last_batch_mse_train = batch_mse_train[-1] last_epoch_mse_train = epoch_mse_train[-1] last_epoch_mse_validation = epoch_mse_validation[-1] assert last_batch_mse_train < last_batch_mse_validation assert last_epoch_mse_train < last_epoch_mse_validation assert last_batch_mse_train < 1 assert last_epoch_mse_train < 1