Ejemplo n.º 1
0
def test_model_stacking_fit_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking, outputs = model_stacking.fit_transform(
        data_inputs, expected_outputs)

    assert outputs.shape == expected_outputs_shape
Ejemplo n.º 2
0
def test_hyperparams_space_round_robin(to_nested_dict_func_name,
                                       to_flat_func_name):
    orig_space = copy.deepcopy(HYPE_SPACE)
    print(orig_space.keys())

    nestened = HyperparameterSpace(
        getattr(orig_space, to_nested_dict_func_name)())
    print(nestened)
    flattened = HyperparameterSpace(getattr(nestened, to_flat_func_name)())

    print(flattened.keys())
    assert flattened.to_flat_as_dict_primitive(
    ) == orig_space.to_flat_as_dict_primitive()
Ejemplo n.º 3
0
def test_meta_step_mixin_update_hyperparams_space_should_update_wrapped_step_hyperparams():
    p = SomeMetaStepMixin(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_META_STEP,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    updated_some_step_hp_space = RandInt(0, 100)
    p.update_hyperparams_space(HyperparameterSpace({
        SOME_STEP_HP: updated_some_step_hp_space
    }))

    assert p.hyperparams_space[META_STEP_HP] == RAND_INT_META_STEP
    assert p.wrapped.get_hyperparams_space()['somestep_hyperparam'] == updated_some_step_hp_space
Ejemplo n.º 4
0
def test_step_cloner_update_hyperparams_space_should_update_wrapped_step_hyperparams():
    p = StepClonerForEachDataInput(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_META_STEP,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    updated_some_step_hp_space = RandInt(0, 400)
    p.update_hyperparams_space(HyperparameterSpace({
        SOME_STEP_HP: updated_some_step_hp_space
    }))

    assert isinstance(p.hyperparams, HyperparameterSamples)
    assert p.hyperparams_space[META_STEP_HP] == RAND_INT_META_STEP
    assert p.wrapped.get_hyperparams_space()[SOME_STEP_HP_KEY] == updated_some_step_hp_space
Ejemplo n.º 5
0
def test_can_update_scipy_distribution():
    p = Identity().set_hyperparams_space(HyperparameterSpace({
        'rand_int_neuraxle': RandInt(2, 5)  # neuraxle
    }))

    p.update_hyperparams_space(HyperparameterSpace({
        'rand_int_scipy': randint(low=2, high=5),  # scipy
        'gamma_scipy': gamma(0.2),  # scipy
    }))

    assert isinstance(p.get_hyperparams_space()['rand_int_scipy'], ScipyDiscreteDistributionWrapper)
    assert isinstance(p.get_hyperparams_space()['gamma_scipy'], ScipyContinuousDistributionWrapper)
    randint_sample = p.get_hyperparams_space()['rand_int_scipy'].rvs()
    gamma_sample = p.get_hyperparams_space()['gamma_scipy'].rvs()
    assert 5 >= randint_sample >= 2
    assert isinstance(gamma_sample, float)
Ejemplo n.º 6
0
def test_dict_to_flat_hyperparams_with_hyperparameter_space(
        expected_flat: dict, dic: dict):
    flat = HyperparameterSpace(dic).to_flat_as_dict_primitive()

    pprint(dict(flat))
    pprint(expected_flat)
    assert dict(flat) == dict(expected_flat)
Ejemplo n.º 7
0
def test_automl_early_stopping_callback(tmpdir):
    # TODO: fix this unit test
    # Given
    hp_repository = InMemoryHyperparamsRepository(cache_folder=str(tmpdir))
    n_epochs = 60
    auto_ml = AutoML(
        pipeline=Pipeline([
            FitTransformCallbackStep().set_name('callback'),
            MultiplyByN(2).set_hyperparams_space(
                HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
            NumpyReshape(new_shape=(-1, 1)),
            linear_model.LinearRegression()
        ]),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(0.20),
        scoring_callback=ScoringCallback(mean_squared_error,
                                         higher_score_is_better=False),
        callbacks=[
            MetricCallback('mse',
                           metric_function=mean_squared_error,
                           higher_score_is_better=False),
        ],
        n_trials=1,
        refit_trial=True,
        epochs=n_epochs,
        hyperparams_repository=hp_repository)

    # When
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 2
    auto_ml = auto_ml.fit(data_inputs=data_inputs,
                          expected_outputs=expected_outputs)

    # Then
    p = auto_ml.get_best_model()
Ejemplo n.º 8
0
    def _create_posterior(self, flat_hyperparameter_space: HyperparameterSpace,
                          trials: Trials) -> HyperparameterSpace:
        # Create a list of all hyperparams and their trials.

        # Loop through all hyperparams
        posterior_distributions: HyperparameterSpace = HyperparameterSpace()
        for (hyperparam_key,
             hyperparam_distribution) in flat_hyperparameter_space.items():

            # Get trial hyperparams
            trial_hyperparams: List[HyperparameterSamples] = [
                trial.hyperparams.to_flat_as_dict_primitive()[hyperparam_key]
                for trial in trials
            ]

            if hyperparam_distribution.is_discrete():
                posterior_distribution = self._reweights_categorical(
                    discrete_distribution=hyperparam_distribution,
                    trial_hyperparameters=trial_hyperparams)
            else:
                posterior_distribution = self._create_gaussian_mixture(
                    continuous_distribution=hyperparam_distribution,
                    trial_hyperparameters=trial_hyperparams)

            posterior_distributions.update(
                {hyperparam_key: posterior_distribution})

        return posterior_distributions
Ejemplo n.º 9
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
def test_automl_sequential_wrapper(tmpdir):
    # Given
    data_inputs = np.array(range(100))
    expected_outputs = np.array(range(100, 200))

    hyperparameter_space = HyperparameterSpace({
        'multiplication_1__multiply_by':
        RandInt(1, 3),
        'multiplication_2__multiply_by':
        RandInt(1, 3),
        'multiplication_3__multiply_by':
        RandInt(1, 3),
    })

    pipeline = Pipeline(
        [('multiplication_1', MultiplyByN()),
         ('multiplication_2', MultiplyByN()),
         ('multiplication_3', MultiplyByN())],
        cache_folder=tmpdir).set_hyperparams_space(hyperparameter_space)

    auto_ml = RandomSearch(
        KFoldCrossValidationWrapper().set_step(pipeline),
        hyperparams_repository=HyperparamsJSONRepository(tmpdir),
        n_iter=10)

    # When
    auto_ml: AutoMLSequentialWrapper = auto_ml.fit(data_inputs,
                                                   expected_outputs)
    best_model: Pipeline = auto_ml.get_best_model()
    predicted_outputs = best_model.transform(data_inputs)

    # Then
    actual_mse = ((predicted_outputs - expected_outputs)**2).mean()
    assert actual_mse < 20000
Ejemplo n.º 11
0
def test_trainer_train():
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 4
    p = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        linear_model.LinearRegression()
    ])

    trainer: Trainer = Trainer(
        epochs=10,
        scoring_callback=ScoringCallback(mean_squared_error,
                                         higher_score_is_better=False),
        validation_splitter=ValidationSplitter(test_size=0.20))

    repo_trial: Trial = trainer.train(pipeline=p,
                                      data_inputs=data_inputs,
                                      expected_outputs=expected_outputs)

    trained_pipeline = repo_trial.get_trained_pipeline(split_number=0)

    outputs = trained_pipeline.transform(data_inputs)
    mse = mean_squared_error(expected_outputs, outputs)

    assert mse < 1
Ejemplo n.º 12
0
def test_automl_should_shallow_copy_data_before_each_epoch():
    # see issue #332 https://github.com/Neuraxio/Neuraxle/issues/332
    data_inputs = np.random.randint(0, 100, (100, 3))
    expected_outputs = np.random.randint(0, 3, 100)

    from sklearn.preprocessing import StandardScaler
    p = Pipeline([
        SKLearnWrapper(StandardScaler()),
        SKLearnWrapper(LinearSVC(),
                       HyperparameterSpace({'C': RandInt(0, 10000)})),
    ])

    auto_ml = AutoML(p,
                     validation_splitter=ValidationSplitter(0.20),
                     refit_trial=True,
                     n_trials=10,
                     epochs=10,
                     cache_folder_when_no_handle='cache',
                     scoring_callback=ScoringCallback(
                         mean_squared_error, higher_score_is_better=False),
                     callbacks=[
                         MetricCallback('mse',
                                        metric_function=mean_squared_error,
                                        higher_score_is_better=False)
                     ],
                     hyperparams_repository=InMemoryHyperparamsRepository(
                         cache_folder='cache'),
                     continue_loop_on_error=False)

    random_search = auto_ml.fit(data_inputs, expected_outputs)

    best_model = random_search.get_best_model()

    assert isinstance(best_model, Pipeline)
Ejemplo n.º 13
0
def main():
    p = Pipeline([
        IdentityWithRvs().set_hyperparams_space(
            HyperparameterSpace({'a': randint(low=2, high=5)})),
        IdentityWithRvs().set_hyperparams_space(
            HyperparameterSpace({'b': randint(low=100, high=400)}))
    ])

    samples: HyperparameterSamples = p.apply(rvs)
    print('p.apply(rvs) ==>')
    print(json.dumps(samples, indent=4))

    # or equivalently:

    samples: HyperparameterSamples = p.apply('_rvs')
    print('p.apply(\'_rvs\') ==>')
    print(json.dumps(samples, indent=4))
Ejemplo n.º 14
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
        ],
            joiner=NumpyTranspose(),
            judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
        )
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()

    assert "AddFeatures" in hyperparams.keys()
    assert "SomeStep" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert "SomeStep1" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]
    assert "SomeStep" in hyperparams["ModelStacking"]
    assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"]
    assert "SomeStep1" in hyperparams["ModelStacking"]
    assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
Ejemplo n.º 15
0
 def __init__(self, brothers):
     super().__init__(brothers,
                      SKLearnWrapper(
                          Ridge(),
                          HyperparameterSpace({
                              "alpha": LogUniform(0.1, 10.0),
                              "fit_intercept": Boolean()
                          })),
                      joiner=NumpyTranspose())
Ejemplo n.º 16
0
def test_meta_step_mixin_should_set_hyperparams_space():
    p = SomeMetaStepMixin(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_META_STEP,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    assert p.hyperparams_space[META_STEP_HP] == RAND_INT_META_STEP
    assert p.get_step().hyperparams_space[SOME_STEP_HP_KEY] == RAND_INT_SOME_STEP
Ejemplo n.º 17
0
def test_choose_one_or_many_step_of_transform_should_choose_step(
        test_case: NeuraxleTestCase):
    p = test_case.pipeline
    p.set_hyperparams_space(HyperparameterSpace(test_case.hyperparams_space))
    p.set_hyperparams(test_case.hyperparams)

    outputs = p.transform(DATA_INPUTS)

    assert np.array_equal(outputs, test_case.expected_processed_outputs)
    assert_callback_data_is_as_expected(test_case)
Ejemplo n.º 18
0
def test_step_cloner_should_set_steps_hyperparams_space():
    p = StepClonerForEachDataInput(SomeStep())

    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_STEP_CLONER,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    assert isinstance(p.get_step().hyperparams_space, HyperparameterSpace)
    assert p.get_step().hyperparams_space[SOME_STEP_HP_KEY] == RAND_INT_SOME_STEP
Ejemplo n.º 19
0
def test_step_cloner_should_get_hyperparams_space():
    p = StepClonerForEachDataInput(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_STEP_CLONER,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    hyperparams_space = p.get_hyperparams_space()

    assert hyperparams_space[META_STEP_HP] == RAND_INT_STEP_CLONER
    assert hyperparams_space[SOME_STEP_HP] == RAND_INT_SOME_STEP
Ejemplo n.º 20
0
 def get_hyperparams_space(self, flat=False):
     all_hyperparams = HyperparameterSpace()
     for step_name, step in self.steps_as_tuple:
         hspace = step.get_hyperparams_space(flat=flat)
         all_hyperparams.update({step_name: hspace})
     all_hyperparams.update(super().get_hyperparams_space())
     if flat:
         all_hyperparams = all_hyperparams.to_flat()
     else:
         all_hyperparams = all_hyperparams.to_nested_dict()
     return all_hyperparams
def create_model_step():
    return TensorflowV1ModelStep(create_graph=create_graph,
                                 create_loss=create_loss,
                                 create_optimizer=create_optimizer,
                                 has_expected_outputs=True).set_hyperparams(
                                     HyperparameterSamples(
                                         {'learning_rate':
                                          0.01})).set_hyperparams_space(
                                              HyperparameterSpace({
                                                  'learning_rate':
                                                  LogUniform(0.0001, 0.01)
                                              }))
Ejemplo n.º 22
0
    def set_hyperparams_space(self, hyperparams_space: Union[HyperparameterSpace, OrderedDict, dict]) -> BaseStep:
        hyperparams_space: HyperparameterSpace = HyperparameterSpace(hyperparams_space).to_nested_dict()

        remainders = dict()
        for name, hparams in hyperparams_space.items():
            if name in self.steps.keys():
                self.steps[name].set_hyperparams_space(hparams)
            else:
                remainders[name] = hparams
        self.hyperparams = remainders

        return self
Ejemplo n.º 23
0
    def __init__(self, steps, hyperparams=None):
        FeatureUnion.__init__(self, steps, joiner=SelectNonEmptyDataInputs())

        self._make_all_steps_optional()

        if hyperparams is None:
            choices = list(self.keys())[:-1]
            self.set_hyperparams(HyperparameterSamples({
                CHOICE_HYPERPARAM: choices[0]
            }))
            self.set_hyperparams_space(HyperparameterSpace({
                CHOICE_HYPERPARAM: Choice(choices)
            }))
Ejemplo n.º 24
0
def test_automl_sklearn_model_with_base_estimator(tmpdir):
    grad_boost = GradientBoostingRegressor()
    bagged_regressor = BaggingRegressor(grad_boost, random_state=5, n_jobs=-1)

    wrapped_bagged_regressor = SKLearnWrapper(
        bagged_regressor,
        HyperparameterSpace({
            "n_estimators": RandInt(10, 100),
            "max_features": Uniform(0.6, 1.0)
        }),
        #  return_all_sklearn_default_params_on_get=True
    )
    _test_within_auto_ml_loop(tmpdir, wrapped_bagged_regressor)
Ejemplo n.º 25
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)}))
        ],
                      joiner=NumpyTranspose(),
                      judge=SomeStep(hyperparams_space=HyperparameterSpace(
                          {"alpha": LogUniform(0.1, 10.0)})))
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()
    flat_hyperparams_keys = hyperparams.to_flat_dict().keys()

    assert 'AddFeatures' in hyperparams
    assert 'SomeStep' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]

    assert 'ModelStacking' in hyperparams
    assert 'SomeStep' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep1"]
    assert 'SomeStep2' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep2"]
    assert 'SomeStep3' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep3"]

    assert 'AddFeatures__SomeStep1__n_components' in flat_hyperparams_keys
    assert 'AddFeatures__SomeStep__n_components' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep1__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep2__max_depth' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep3__max_depth' in flat_hyperparams_keys
Ejemplo n.º 26
0
    def test_logger_automl(self, tmpdir):
        # Given
        context = ExecutionContext()
        self.tmpdir = str(tmpdir)
        hp_repository = HyperparamsJSONRepository(cache_folder=self.tmpdir)
        n_epochs = 2
        n_trials = 4
        auto_ml = AutoML(
            pipeline=Pipeline([
                MultiplyByN(2).set_hyperparams_space(
                    HyperparameterSpace(
                        {'multiply_by': FixedHyperparameter(2)})),
                NumpyReshape(new_shape=(-1, 1)),
                LoggingStep()
            ]),
            hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(
            ),
            validation_splitter=ValidationSplitter(0.20),
            scoring_callback=ScoringCallback(mean_squared_error,
                                             higher_score_is_better=False),
            n_trials=n_trials,
            refit_trial=True,
            epochs=n_epochs,
            hyperparams_repository=hp_repository,
            continue_loop_on_error=False)

        # When
        data_container = DataContainer(
            data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
            expected_outputs=np.array([10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
        auto_ml.handle_fit(data_container, context)

        # Then
        file_paths = [
            os.path.join(hp_repository.cache_folder, f"trial_{i}.log")
            for i in range(n_trials)
        ]
        assert len(file_paths) == n_trials

        for f in file_paths:
            assert os.path.exists(f)

        for f in file_paths:
            with open(f, 'r') as f:
                log = f.readlines()
                assert len(log) == 36
Ejemplo n.º 27
0
    def __init__(self,
                 hyperparams: HyperparameterSamples = None,
                 hyperparams_space: HyperparameterSpace = None,
                 name: str = None):
        if hyperparams is None:
            hyperparams = dict()
        if hyperparams_space is None:
            hyperparams_space = dict()
        if name is None:
            name = self.__class__.__name__

        self.hyperparams: HyperparameterSamples = HyperparameterSamples(
            hyperparams)
        self.hyperparams_space: HyperparameterSpace = HyperparameterSpace(
            hyperparams_space)
        self.name: str = name

        self.pending_mutate: ('BaseStep', str, str) = (None, None, None)
Ejemplo n.º 28
0
def test_automl_savebestmodel_callback(tmpdir):
    # Given
    hp_repository = HyperparamsJSONRepository(cache_folder=str('caching'))
    validation_splitter = ValidationSplitter(0.20)
    auto_ml = AutoML(
        pipeline=Pipeline([
            MultiplyByN(2).set_hyperparams_space(HyperparameterSpace({
                'multiply_by': FixedHyperparameter(2)
            })),
            NumpyReshape(new_shape=(-1, 1)),
            linear_model.LinearRegression()
        ]),
        validation_splitter=validation_splitter,
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False),
        callbacks=[
            BestModelCheckpoint()
        ],
        n_trials=1,
        epochs=10,
        refit_trial=False,
        print_func=print,
        hyperparams_repository=hp_repository,
        continue_loop_on_error=False
    )

    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 4

    # When
    auto_ml.fit(data_inputs=data_inputs, expected_outputs=expected_outputs)


    #Then
    trials: Trials = hp_repository.load_all_trials()
    best_trial = trials.get_best_trial()
    best_trial_score = best_trial.get_validation_score()
    best_trial.cache_folder = hp_repository.cache_folder
    best_model = best_trial.get_model('best')
    _, _, valid_inputs, valid_outputs = ValidationSplitter(0.20).split(data_inputs, expected_outputs)
    predicted_output = best_model.predict(valid_inputs)
    score = mean_squared_error(valid_outputs, predicted_output)

    assert best_trial_score == score
Ejemplo n.º 29
0
    def __init__(self, wrapped: BaseTransformer, enabled: bool = True, nullified_return_value=None,
                 cache_folder_when_no_handle=None, use_hyperparameter_space=True, nullify_hyperparams=True):
        hyperparameter_space = HyperparameterSpace({
            OPTIONAL_ENABLED_HYPERPARAM: Boolean()
        }) if use_hyperparameter_space else {}

        MetaStep.__init__(
            self,
            hyperparams=HyperparameterSamples({
                OPTIONAL_ENABLED_HYPERPARAM: enabled
            }),
            hyperparams_space=hyperparameter_space,
            wrapped=wrapped
        )
        ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle)

        if nullified_return_value is None:
            nullified_return_value = []
        self.nullified_return_value = nullified_return_value
        self.nullify_hyperparams = nullify_hyperparams
Ejemplo n.º 30
0
def test_automl_sequential_wrapper_with_validation_split_wrapper(tmpdir):
    # Setting seed for reproducibility
    np.random.seed(75)
    # Given
    data_inputs = np.array(range(100))
    expected_outputs = np.array(range(100, 200))

    hyperparameter_space = HyperparameterSpace({
        'multiplication_1__multiply_by':
        RandInt(1, 3),
        'multiplication_2__multiply_by':
        RandInt(1, 3),
        'multiplication_3__multiply_by':
        RandInt(1, 3),
    })

    pipeline = Pipeline(
        [('multiplication_1', MultiplyByN()),
         ('multiplication_2', MultiplyByN()),
         ('multiplication_3', MultiplyByN())],
        cache_folder=tmpdir).set_hyperparams_space(hyperparameter_space)

    random_search = RandomSearch(
        ValidationSplitWrapper(pipeline,
                               test_size=0.2,
                               scoring_function=mean_squared_error,
                               run_validation_split_in_test_mode=False),
        hyperparams_repository=HyperparamsJSONRepository(tmpdir),
        higher_score_is_better=False,
        n_iter=100)

    # When
    mse_before = ((data_inputs - expected_outputs)**2).mean()
    random_search: AutoMLSequentialWrapper = random_search.fit(
        data_inputs, expected_outputs)
    best_model: Pipeline = random_search.get_best_model()
    predicted_outputs = best_model.transform(data_inputs)

    # Then
    actual_mse = ((predicted_outputs - expected_outputs)**2).mean()
    assert actual_mse < mse_before