def create_test_case_invalid_step_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', TransformCallbackStep(a_callback, transform_function=lambda di: di * 2)), ('b', TransformCallbackStep(b_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, b_callback], expected_callbacks_data=[DATA_INPUTS, DATA_INPUTS], hyperparams={ 'ChooseOneOrManyStepsOf__c__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': False }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def create_test_case_fit_multiple_steps_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase( pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, c_callback, b_callback, d_callback], expected_callbacks_data=[ [], (DATA_INPUTS, EXPECTED_OUTPUTS), [], (DATA_INPUTS, EXPECTED_OUTPUTS) ], hyperparams={ 'ChooseOneOrManyStepsOf__a__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': True }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18]) )
def test_model_stacking_fit_transform(): model_stacking = Pipeline([ ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) expected_outputs_shape = (379, 1) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape) model_stacking, outputs = model_stacking.fit_transform( data_inputs, expected_outputs) assert outputs.shape == expected_outputs_shape
def __init__(self, brothers): super().__init__(brothers, SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.1, 10.0), "fit_intercept": Boolean() })), joiner=NumpyTranspose())
def __init__(self, wrapped: BaseTransformer, enabled: bool = True, nullified_return_value=None, cache_folder_when_no_handle=None, use_hyperparameter_space=True, nullify_hyperparams=True): hyperparameter_space = HyperparameterSpace({ OPTIONAL_ENABLED_HYPERPARAM: Boolean() }) if use_hyperparameter_space else {} MetaStep.__init__( self, hyperparams=HyperparameterSamples({ OPTIONAL_ENABLED_HYPERPARAM: enabled }), hyperparams_space=hyperparameter_space, wrapped=wrapped ) ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle) if nullified_return_value is None: nullified_return_value = [] self.nullified_return_value = nullified_return_value self.nullify_hyperparams = nullify_hyperparams
from neuraxle.base import MetaStepMixin, BaseStep, NonFittableMixin, NonTransformableMixin from neuraxle.hyperparams.distributions import RandInt, Boolean from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples from neuraxle.steps.loop import StepClonerForEachDataInput from testing.test_pipeline import SomeStep SOME_STEP_HP_KEY = 'somestep_hyperparam' RAND_INT_SOME_STEP = RandInt(-10, 0) RAND_INT_STEP_CLONER = RandInt(0, 10) META_STEP_HP = 'metastep_hyperparam' SOME_STEP_HP = "SomeStep__somestep_hyperparam" META_STEP_HP_VALUE = 1 SOME_STEP_HP_VALUE = 2 HYPE_SPACE = HyperparameterSpace({"a__test": Boolean()}) HYPE_SAMPLE = HyperparameterSamples({"a__test": True}) class SomeMetaStepMixin(NonTransformableMixin, NonFittableMixin, MetaStepMixin, BaseStep): pass class SomeStepInverseTransform(SomeStep): def fit_transform(self, data_inputs, expected_outputs=None): return self, 'fit_transform' def inverse_transform(self, processed_outputs): return 'inverse_transform'
SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) print("Meta-fitting on train:") p = p.meta_fit(X_train, y_train, metastep=RandomSearch(n_iter=10, higher_score_is_better=True, validation_technique=KFoldCrossValidation( scoring_function=r2_score, k_fold=10))) # Here is an alternative way to do it, more "pipeliney": # p = RandomSearch( # n_iter=15,
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) print("Meta-fitting on train:") p = p.meta_fit(X_train, y_train, metastep=RandomSearch( n_iter=10, higher_score_is_better=True, validation_technique=KFoldCrossValidationWrapper( scoring_function=r2_score, k_fold=10))) # Here is an alternative way to do it, more "pipeliney": # p = RandomSearch( # p, # n_iter=15, # higher_score_is_better=True, # validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3) # ).fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def main(): # Define classification models, and hyperparams. # See also HyperparameterSpace documentation : https://www.neuraxle.org/stable/api/neuraxle.hyperparams.space.html#neuraxle.hyperparams.space.HyperparameterSpace decision_tree_classifier = SKLearnWrapper( DecisionTreeClassifier(), HyperparameterSpace({ 'criterion': Choice(['gini', 'entropy']), 'splitter': Choice(['best', 'random']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4) })) extra_tree_classifier = SKLearnWrapper( ExtraTreeClassifier(), HyperparameterSpace({ 'criterion': Choice(['gini', 'entropy']), 'splitter': Choice(['best', 'random']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4) })) ridge_classifier = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( RidgeClassifier(), HyperparameterSpace({ 'alpha': Choice([0.0, 1.0, 10.0, 100.0]), 'fit_intercept': Boolean(), 'normalize': Boolean() })) ]).set_name('RidgeClassifier') logistic_regression = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( LogisticRegression(), HyperparameterSpace({ 'C': LogUniform(0.01, 10.0), 'fit_intercept': Boolean(), 'penalty': Choice(['none', 'l2']), 'max_iter': RandInt(20, 200) })) ]).set_name('LogisticRegression') random_forest_classifier = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( RandomForestClassifier(), HyperparameterSpace({ 'n_estimators': RandInt(50, 600), 'criterion': Choice(['gini', 'entropy']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4), 'bootstrap': Boolean() })) ]).set_name('RandomForestClassifier') # Define a classification pipeline that lets the AutoML loop choose one of the classifier. # See also ChooseOneStepOf documentation : https://www.neuraxle.org/stable/api/neuraxle.steps.flow.html#neuraxle.steps.flow.ChooseOneStepOf pipeline = Pipeline([ ChooseOneStepOf([ decision_tree_classifier, extra_tree_classifier, ridge_classifier, logistic_regression, random_forest_classifier ]) ]) # Create the AutoML loop object. # See also AutoML documentation : https://www.neuraxle.org/stable/api/neuraxle.metaopt.auto_ml.html#neuraxle.metaopt.auto_ml.AutoML auto_ml = AutoML( pipeline=pipeline, hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_splitter=ValidationSplitter(test_size=0.20), scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=True), n_trials=7, epochs=1, hyperparams_repository=HyperparamsJSONRepository(cache_folder='cache'), refit_trial=True, continue_loop_on_error=False) # Load data, and launch AutoML loop ! X_train, y_train, X_test, y_test = generate_classification_data() auto_ml = auto_ml.fit(X_train, y_train) # Get the model from the best trial, and make predictions using predict. # See also predict documentation : https://www.neuraxle.org/stable/api/neuraxle.base.html#neuraxle.base.BaseStep.predict best_pipeline = auto_ml.get_best_model() y_pred = best_pipeline.predict(X_test) accuracy = accuracy_score(y_true=y_test, y_pred=y_pred) print("Test accuracy score:", accuracy) shutil.rmtree('cache')
HYPERPARAMETERS_SPACE = HyperparameterSpace({ 'learning_rate': LogUniform(0.0001, 0.1), 'l2_weight_reg': LogUniform(0.0001, 0.1), 'momentum': LogUniform(0.01, 1.0), 'hidden_size': Quantized(LogUniform(16, 512)), 'num_layers': RandInt(1, 4), 'num_lstm_layers': RandInt(1, 2), 'use_xavier_init': Boolean(), 'use_max_pool_else_avg_pool': Boolean(), 'dropout_drop_proba': LogUniform(0.3, 0.7) }) HYPERPARAMETERS = HyperparameterSamples({ 'learning_rate': 0.1, 'l2_weight_reg': 0.001, 'hidden_size': 32, 'num_layers': 3, 'num_lstm_layers': 1, 'use_xavier_init': True, 'use_max_pool_else_avg_pool': True, 'dropout_drop_proba': 0.5,
from neuraxle.hyperparams.distributions import RandInt, Boolean from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples from neuraxle.steps.loop import StepClonerForEachDataInput from testing.test_pipeline import SomeStep SOME_STEP_HP_KEY = 'somestep_hyperparam' RAND_INT_SOME_STEP = RandInt(-10, 0) RAND_INT_STEP_CLONER = RandInt(0, 10) META_STEP_HP = 'metastep_hyperparam' SOME_STEP_HP = "SomeStep__somestep_hyperparam" META_STEP_HP_VALUE = 1 SOME_STEP_HP_VALUE = 2 HYPE_SPACE = HyperparameterSpace({ "a__test": Boolean() }) HYPE_SAMPLE = HyperparameterSamples({ "a__test": True }) class SomeMetaStepMixin(NonTransformableMixin, NonFittableMixin, MetaStepMixin, BaseStep): pass class SomeStepInverseTransform(SomeStep): def fit_transform(self, data_inputs, expected_outputs=None): return self, 'fit_transform'
def main(tmpdir): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set # within the classes ar their definition if using custom classes, or also it could be defined after declaring the # pipeline using a flat dict or a nested dict. p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), ]), ModelStacking([ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4), "learning_rate": LogUniform(0.07, 0.7) }) ), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)}) ), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()}) ), ) ]) print("Meta-fitting on train:") auto_ml = AutoML( p, validation_splitter=ValidationSplitter(0.20), refit_trial=True, n_trials=10, epochs=1, # 1 epoc here due to using sklearn models that just fit once. cache_folder_when_no_handle=str(tmpdir), scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False), callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)], hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=str(tmpdir)) ) random_search = auto_ml.fit(X_train, y_train) p = random_search.get_best_model() print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_transform = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_transform) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test)
def test_deep_learning_pipeline(): # Given boston = load_boston() data_inputs, expected_outputs = shuffle(boston.data, boston.target, random_state=13) expected_outputs = expected_outputs.astype(np.float32) data_inputs = data_inputs.astype(np.float32) pipeline = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)})), ]), ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(n_clusters=7), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) p = DeepLearningPipeline( pipeline, validation_size=VALIDATION_SIZE, batch_size=BATCH_SIZE, batch_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, shuffle_in_each_epoch_at_train=True, n_epochs=N_EPOCHS, epochs_metrics={'mse': to_numpy_metric_wrapper(mean_squared_error)}, scoring_function=to_numpy_metric_wrapper(mean_squared_error), ) # When p, outputs = p.fit_transform(data_inputs, expected_outputs) # Then batch_mse_train = p.get_batch_metric_train('mse') epoch_mse_train = p.get_epoch_metric_train('mse') batch_mse_validation = p.get_batch_metric_validation('mse') epoch_mse_validation = p.get_epoch_metric_validation('mse') assert len(epoch_mse_train) == N_EPOCHS assert len(epoch_mse_validation) == N_EPOCHS expected_len_batch_mse_train = math.ceil( (len(data_inputs) / BATCH_SIZE) * (1 - VALIDATION_SIZE)) * N_EPOCHS expected_len_batch_mse_validation = math.ceil( (len(data_inputs) / BATCH_SIZE) * VALIDATION_SIZE) * N_EPOCHS assert len(batch_mse_train) == expected_len_batch_mse_train assert len(batch_mse_validation) == expected_len_batch_mse_validation last_batch_mse_validation = batch_mse_validation[-1] last_batch_mse_train = batch_mse_train[-1] last_epoch_mse_train = epoch_mse_train[-1] last_epoch_mse_validation = epoch_mse_validation[-1] assert last_batch_mse_train < last_batch_mse_validation assert last_epoch_mse_train < last_epoch_mse_validation assert last_batch_mse_train < 1 assert last_epoch_mse_train < 1