def choose_one_step_single_step_chosen_transform(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneStepOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[ a_callback, c_callback, b_callback, d_callback ], expected_callbacks_data=[DATA_INPUTS, [], [], []], hyperparams={ 'ChooseOneOrManyStepsOf__choice': 'a' }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def test_choose_one_step_of_invalid_chosen_step(): with pytest.raises(ValueError): Pipeline([ ChooseOneStepOf([ ('a', Identity()), ('b', Identity()) ]).set_hyperparams({'choice': 'c'}), ])
def test_choose_one_step_of_update_hyperparams(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() choose_one_step_of = ChooseOneStepOf([ ('a', FitTransformCallbackStep( a_callback, c_callback, transform_function=lambda di: di * 2).set_name("step_1")), ('b', FitTransformCallbackStep( b_callback, d_callback, transform_function=lambda di: di * 2).set_name("step_1")) ]) p = Pipeline([choose_one_step_of]) p.transform(DATA_INPUTS) assert len(a_callback.data) == 1 assert all(a_callback.data[0] == DATA_INPUTS) assert len(b_callback.data) == 0 assert len(c_callback.data) == 0 assert len(d_callback.data) == 0 choose_one_step_of.update_hyperparams({'choice': 'b'}) p.transform(DATA_INPUTS) assert len(a_callback.data) == 1 assert all(a_callback.data[0] == DATA_INPUTS) assert len(b_callback.data) == 1 assert all(b_callback.data[0] == DATA_INPUTS) assert len(c_callback.data) == 0 assert len(d_callback.data) == 0
def test_choose_one_step_of_set_hyperparams(method_name, args, kwargs): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() choose_one_step_of = ChooseOneStepOf([ ('a', FitTransformCallbackStep( a_callback, c_callback, transform_function=lambda di: di * 2).set_name("step_1")), ('b', FitTransformCallbackStep( b_callback, d_callback, transform_function=lambda di: di * 2).set_name("step_1")) ]) p = Pipeline([choose_one_step_of]) p.transform(DATA_INPUTS) assert len(a_callback.data) == 1 assert all(a_callback.data[0] == DATA_INPUTS) assert len(b_callback.data) == 0 assert len(c_callback.data) == 0 assert len(d_callback.data) == 0 getattr(choose_one_step_of, method_name)(*args, **kwargs) p.transform(DATA_INPUTS) assert len(a_callback.data) == 1 assert all(a_callback.data[0] == DATA_INPUTS) assert len(b_callback.data) == 1 assert all(b_callback.data[0] == DATA_INPUTS) assert len(c_callback.data) == 0 assert len(d_callback.data) == 0
def main(): # Define classification models, and hyperparams. # See also HyperparameterSpace documentation : https://www.neuraxle.org/stable/api/neuraxle.hyperparams.space.html#neuraxle.hyperparams.space.HyperparameterSpace decision_tree_classifier = SKLearnWrapper( DecisionTreeClassifier(), HyperparameterSpace({ 'criterion': Choice(['gini', 'entropy']), 'splitter': Choice(['best', 'random']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4) })) extra_tree_classifier = SKLearnWrapper( ExtraTreeClassifier(), HyperparameterSpace({ 'criterion': Choice(['gini', 'entropy']), 'splitter': Choice(['best', 'random']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4) })) ridge_classifier = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( RidgeClassifier(), HyperparameterSpace({ 'alpha': Choice([0.0, 1.0, 10.0, 100.0]), 'fit_intercept': Boolean(), 'normalize': Boolean() })) ]).set_name('RidgeClassifier') logistic_regression = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( LogisticRegression(), HyperparameterSpace({ 'C': LogUniform(0.01, 10.0), 'fit_intercept': Boolean(), 'penalty': Choice(['none', 'l2']), 'max_iter': RandInt(20, 200) })) ]).set_name('LogisticRegression') random_forest_classifier = Pipeline([ OutputTransformerWrapper(NumpyRavel()), SKLearnWrapper( RandomForestClassifier(), HyperparameterSpace({ 'n_estimators': RandInt(50, 600), 'criterion': Choice(['gini', 'entropy']), 'min_samples_leaf': RandInt(2, 5), 'min_samples_split': RandInt(2, 4), 'bootstrap': Boolean() })) ]).set_name('RandomForestClassifier') # Define a classification pipeline that lets the AutoML loop choose one of the classifier. # See also ChooseOneStepOf documentation : https://www.neuraxle.org/stable/api/neuraxle.steps.flow.html#neuraxle.steps.flow.ChooseOneStepOf pipeline = Pipeline([ ChooseOneStepOf([ decision_tree_classifier, extra_tree_classifier, ridge_classifier, logistic_regression, random_forest_classifier ]) ]) # Create the AutoML loop object. # See also AutoML documentation : https://www.neuraxle.org/stable/api/neuraxle.metaopt.auto_ml.html#neuraxle.metaopt.auto_ml.AutoML auto_ml = AutoML( pipeline=pipeline, hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_splitter=ValidationSplitter(test_size=0.20), scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=True), n_trials=7, epochs=1, hyperparams_repository=HyperparamsJSONRepository(cache_folder='cache'), refit_trial=True, continue_loop_on_error=False) # Load data, and launch AutoML loop ! X_train, y_train, X_test, y_test = generate_classification_data() auto_ml = auto_ml.fit(X_train, y_train) # Get the model from the best trial, and make predictions using predict. # See also predict documentation : https://www.neuraxle.org/stable/api/neuraxle.base.html#neuraxle.base.BaseStep.predict best_pipeline = auto_ml.get_best_model() y_pred = best_pipeline.predict(X_test) accuracy = accuracy_score(y_true=y_test, y_pred=y_pred) print("Test accuracy score:", accuracy) shutil.rmtree('cache')