def __init__(self, brothers): ModelStacking.__init__( self, brothers, SKLearnWrapper( Ridge(), HyperparameterSpace({"alpha": LogUniform(0.1, 10.0), "fit_intercept": Boolean()}) ), joiner=NumpyTranspose() )
def test_hyperparam_space(): p = Pipeline([ AddFeatures([ SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})) ]), ModelStacking([ SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})) ], joiner=NumpyTranspose(), judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)})) ) ]) rvsed = p.get_hyperparams_space() p.set_hyperparams(rvsed) hyperparams = p.get_hyperparams() assert "AddFeatures" in hyperparams.keys() assert "SomeStep" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep"] assert "SomeStep1" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"] assert "SomeStep" in hyperparams["ModelStacking"] assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"] assert "SomeStep1" in hyperparams["ModelStacking"] assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
def test_model_stacking_fit_transform(): model_stacking = Pipeline([ ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) expected_outputs_shape = (379, 1) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape) model_stacking, outputs = model_stacking.fit_transform( data_inputs, expected_outputs) assert outputs.shape == expected_outputs_shape
def test_hyperparam_space(): p = Pipeline([ AddFeatures([ SomeStep(hyperparams_space=HyperparameterSpace( {"n_components": RandInt(1, 5)})), SomeStep(hyperparams_space=HyperparameterSpace( {"n_components": RandInt(1, 5)})) ]), ModelStacking([ SomeStep(hyperparams_space=HyperparameterSpace( {"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace( {"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace( {"max_depth": RandInt(1, 100)})), SomeStep(hyperparams_space=HyperparameterSpace( {"max_depth": RandInt(1, 100)})) ], joiner=NumpyTranspose(), judge=SomeStep(hyperparams_space=HyperparameterSpace( {"alpha": LogUniform(0.1, 10.0)}))) ]) rvsed = p.get_hyperparams_space() p.set_hyperparams(rvsed) hyperparams = p.get_hyperparams() flat_hyperparams_keys = hyperparams.to_flat_dict().keys() assert 'AddFeatures' in hyperparams assert 'SomeStep' in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep"] assert 'SomeStep1' in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"] assert 'ModelStacking' in hyperparams assert 'SomeStep' in hyperparams["ModelStacking"] assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep"] assert 'SomeStep1' in hyperparams["ModelStacking"] assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep1"] assert 'SomeStep2' in hyperparams["ModelStacking"] assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep2"] assert 'SomeStep3' in hyperparams["ModelStacking"] assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep3"] assert 'AddFeatures__SomeStep1__n_components' in flat_hyperparams_keys assert 'AddFeatures__SomeStep__n_components' in flat_hyperparams_keys assert 'ModelStacking__SomeStep__n_estimators' in flat_hyperparams_keys assert 'ModelStacking__SomeStep1__n_estimators' in flat_hyperparams_keys assert 'ModelStacking__SomeStep2__max_depth' in flat_hyperparams_keys assert 'ModelStacking__SomeStep3__max_depth' in flat_hyperparams_keys
ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), )
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) print("Meta-fitting on train:") p = p.meta_fit(X_train, y_train, metastep=RandomSearch( n_iter=10, higher_score_is_better=True, validation_technique=KFoldCrossValidationWrapper( scoring_function=r2_score, k_fold=10))) # Here is an alternative way to do it, more "pipeliney": # p = RandomSearch( # p, # n_iter=15, # higher_score_is_better=True, # validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3) # ).fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def main(tmpdir): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), ]), ModelStacking([ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4), "learning_rate": LogUniform(0.07, 0.7) }) ), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)}) ), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()}) ), ) ]) print("Meta-fitting on train:") auto_ml = AutoML( p, validation_splitter=ValidationSplitter(0.20), refit_trial=True, n_trials=10, epochs=1, # 1 epoc here due to using sklearn models that just fit once. cache_folder_when_no_handle=str(tmpdir), scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False), callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)], hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=str(tmpdir)) ) random_search = auto_ml.fit(X_train, y_train) p = random_search.get_best_model() print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def test_deep_learning_pipeline(): # Given boston = load_boston() data_inputs, expected_outputs = shuffle(boston.data, boston.target, random_state=13) expected_outputs = expected_outputs.astype(np.float32) data_inputs = data_inputs.astype(np.float32) pipeline = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(n_clusters=7), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) p = DeepLearningPipeline( pipeline, validation_size=VALIDATION_SIZE, batch_size=BATCH_SIZE, batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, shuffle_in_each_epoch_at_train=True, n_epochs=N_EPOCHS, epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, scoring_function=to_numpy_metric_wrapper(mean_squared_error), ) # When p, outputs = p.fit_transform(data_inputs, expected_outputs) # Then batch_mse_train = p.get_batch_metric_train('mse') epoch_mse_train = p.get_epoch_metric_train('mse') batch_mse_validation = p.get_batch_metric_validation('mse') epoch_mse_validation = p.get_epoch_metric_validation('mse') assert len(epoch_mse_train) == N_EPOCHS assert len(epoch_mse_validation) == N_EPOCHS expected_len_batch_mse_train = math.ceil( (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS expected_len_batch_mse_validation = math.ceil( (len(data_inputs) / BATCH_SIZE) * VALIDATION_SIZE) * N_EPOCHS assert len(batch_mse_train) == expected_len_batch_mse_train assert len(batch_mse_validation) == expected_len_batch_mse_validation last_batch_mse_validation = batch_mse_validation[-1] last_batch_mse_train = batch_mse_train[-1] last_epoch_mse_train = epoch_mse_train[-1] last_epoch_mse_validation = epoch_mse_validation[-1] assert last_batch_mse_train < last_batch_mse_validation assert last_epoch_mse_train < last_epoch_mse_validation assert last_batch_mse_train < 1 assert last_epoch_mse_train < 1