def test_automl_sklearn_model_with_base_estimator(tmpdir): grad_boost = GradientBoostingRegressor() bagged_regressor = BaggingRegressor(grad_boost, random_state=5, n_jobs=-1) wrapped_bagged_regressor = SKLearnWrapper( bagged_regressor, HyperparameterSpace({ "n_estimators": RandInt(10, 100), "max_features": Uniform(0.6, 1.0) }), # return_all_sklearn_default_params_on_get=True ) _test_within_auto_ml_loop(tmpdir, wrapped_bagged_regressor)
def main(): def accuracy(data_inputs, expected_outputs): return np.mean( np.argmax(np.array(data_inputs), axis=1) == np.argmax( np.array(expected_outputs), axis=1)) # load the dataset df = read_csv('data/winequality-white.csv', sep=';') data_inputs = df.values data_inputs[:, -1] = data_inputs[:, -1] - 1 n_features = data_inputs.shape[1] - 1 n_classes = 10 p = Pipeline([ TrainOnlyWrapper(DataShuffler()), ColumnTransformerInputOutput( input_columns=[( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ToNumpy(np.float32) )], output_columns=[(11, Identity())] ), OutputTransformerWrapper(PlotDistribution(column=-1)), MiniBatchSequentialPipeline([ Tensorflow2ModelStep( create_model=create_model, create_loss=create_loss, create_optimizer=create_optimizer ) \ .set_hyperparams(HyperparameterSamples({ 'n_dense_layers': 2, 'input_dim': n_features, 'optimizer': 'adam', 'activation': 'relu', 'kernel_initializer': 'he_uniform', 'learning_rate': 0.01, 'hidden_dim': 20, 'n_classes': 3 })).set_hyperparams_space(HyperparameterSpace({ 'n_dense_layers': RandInt(2, 4), 'hidden_dim_layer_multiplier': Uniform(0.30, 1), 'input_dim': FixedHyperparameter(n_features), 'optimizer': Choice([ OPTIMIZERS.ADAM.value, OPTIMIZERS.SGD.value, OPTIMIZERS.ADAGRAD.value ]), 'activation': Choice([ ACTIVATIONS.RELU.value, ACTIVATIONS.TANH.value, ACTIVATIONS.SIGMOID.value, ACTIVATIONS.ELU.value, ]), 'kernel_initializer': Choice([ KERNEL_INITIALIZERS.GLOROT_NORMAL.value, KERNEL_INITIALIZERS.GLOROT_UNIFORM.value, KERNEL_INITIALIZERS.HE_UNIFORM.value ]), 'learning_rate': LogUniform(0.005, 0.01), 'hidden_dim': RandInt(3, 80), 'n_classes': FixedHyperparameter(n_classes) })) ], batch_size=33), OutputTransformerWrapper(Pipeline([ ExpandDim(), OneHotEncoder(nb_columns=n_classes, name='classes') ])) ]) auto_ml = AutoML( pipeline=p, hyperparams_repository=InMemoryHyperparamsRepository( cache_folder='trials'), hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_splitter=ValidationSplitter(test_size=0.30), scoring_callback=ScoringCallback(accuracy, higher_score_is_better=True), callbacks=[ MetricCallback( name='classification_report_imbalanced_metric', metric_function=classificaiton_report_imbalanced_metric, higher_score_is_better=True), MetricCallback(name='f1', metric_function=f1_score_weighted, higher_score_is_better=True), MetricCallback(name='recall', metric_function=recall_score_weighted, higher_score_is_better=True), MetricCallback(name='precision', metric_function=precision_score_weighted, higher_score_is_better=True), EarlyStoppingCallback(max_epochs_without_improvement=3) ], n_trials=200, refit_trial=True, epochs=75) auto_ml = auto_ml.fit(data_inputs=data_inputs)
HyperparameterSpace({ 'add': Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]), })), AddN(0.).set_hyperparams_space( HyperparameterSpace({ 'add': Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]), })) ])), (3.5, Pipeline([ FitTransformCallbackStep().set_name('callback'), AddN(0.).set_hyperparams_space( HyperparameterSpace({ 'add': Quantized(hd=Uniform(0, 10)), })), AddN(0.).set_hyperparams_space( HyperparameterSpace({ 'add': Quantized(hd=Uniform(0, 10)), })) ])), (3.5, Pipeline([ FitTransformCallbackStep().set_name('callback'), AddN(0.).set_hyperparams_space( HyperparameterSpace({ 'add': LogUniform(min_included=1.0, max_included=5.0), })), AddN(0.).set_hyperparams_space(