def test_when_hyperparams_and_saved_same_pipeline_should_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == EXPECTED_TAPE_AFTER_CHECKPOINT
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=Identity(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}), different=True, save_pipeline=False ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=pickle_checkpoint_step, tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"]
def test_recursive_dict_to_nested_dict(): dict_values = {'hp': 1, 'stepa__hp': 2, 'stepa__stepb__hp': 3} r = HyperparameterSamples(**dict_values) r = r.to_nested_dict() expected_dict_values = {'hp': 1, 'stepa': {'hp': 2, 'stepb': {'hp': 3}}} assert r == HyperparameterSamples(**expected_dict_values)
def test_hyperparams_to_flat(): dict_values = {'hp': 1, 'stepa': {'hp': 2, 'stepb': {'hp': 3}}} r = HyperparameterSamples(**dict_values) r = r.to_flat() expected_dict_values = {'hp': 1, 'stepa__hp': 2, 'stepa__stepb__hp': 3} assert r == HyperparameterSamples(**expected_dict_values)
def test_has_children_mixin_apply_should_return_recursive_dict_to_direct_childrends( ): p = Pipeline([ ('a', Identity().set_hyperparams(HyperparameterSamples({'hp': 0}))), ('b', Identity().set_hyperparams(HyperparameterSamples({'hp': 1}))) ]) results = p.apply('_get_hyperparams', ra=None) assert results.to_flat_as_dict_primitive()['a__hp'] == 0 assert results.to_flat_as_dict_primitive()['b__hp'] == 1
def test_sklearn_wrapper_update_hyperparams(): p = SKLearnWrapper(PCA()) p.set_hyperparams( HyperparameterSamples({ 'n_components': 2, 'svd_solver': 'full' })) p.update_hyperparams(HyperparameterSamples({'n_components': 4})) assert p.wrapped_sklearn_predictor.n_components == 4 assert p.wrapped_sklearn_predictor.svd_solver == 'full'
def test_meta_step_mixin_update_hyperparams_should_update_wrapped_step_hyperparams(): p = SomeMetaStepMixin(SomeStep()) p.set_hyperparams(HyperparameterSamples({ META_STEP_HP: META_STEP_HP_VALUE, SOME_STEP_HP: SOME_STEP_HP_VALUE })) p.update_hyperparams(HyperparameterSamples({ SOME_STEP_HP: SOME_STEP_HP_VALUE + 1 })) assert p.hyperparams[META_STEP_HP] == META_STEP_HP_VALUE assert p.get_step().get_hyperparams()['somestep_hyperparam'] == SOME_STEP_HP_VALUE + 1
def test_step_cloner_update_hyperparams_should_update_wrapped_step_hyperparams(): p = StepClonerForEachDataInput(SomeStep()) p.set_hyperparams(HyperparameterSamples({ META_STEP_HP: META_STEP_HP_VALUE, SOME_STEP_HP: SOME_STEP_HP_VALUE })) p.update_hyperparams(HyperparameterSamples({ SOME_STEP_HP: SOME_STEP_HP_VALUE + 1, })) assert isinstance(p.hyperparams, HyperparameterSamples) assert p.hyperparams[META_STEP_HP] == META_STEP_HP_VALUE assert p.get_step().get_hyperparams()[SOME_STEP_HP_KEY] == SOME_STEP_HP_VALUE + 1
def test_hyperparams_repository_should_load_all_trials(tmpdir): tmpdir = os.path.join(tmpdir, "__json__") os.mkdir(tmpdir) hyperparams_json_repository = HyperparamsJSONRepository(tmpdir) n_trials = 3 for i in range(n_trials): hyperparams = HyperparameterSamples({'learning_rate': 0.01 + i * 0.01}) hyperparams_json_repository.save_score_for_success_trial(hyperparams, i) trials = hyperparams_json_repository.load_all_trials() assert len(trials) == n_trials for i in range(n_trials): assert trials[i].hyperparams == HyperparameterSamples( {'learning_rate': 0.01 + i * 0.01}).to_flat_as_dict_primitive(), (i, str(trials))
def test_has_children_mixin_apply_should_return_recursive_dict_to_recursive_childrends( ): p = Pipeline([ Pipeline([ ('c', Identity().set_hyperparams(HyperparameterSamples({'hp': 3}))), ('d', Identity().set_hyperparams(HyperparameterSamples({'hp': 4}))) ]).set_hyperparams(HyperparameterSamples({'hp': 2})), ]) results = p.apply('_get_hyperparams', ra=None) assert results['Pipeline__hp'] == 2 assert results['Pipeline__c__hp'] == 3 assert results['Pipeline__d__hp'] == 4
def test_when_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape, HyperparameterSamples({"a__learning_rate": 1})) pipeline, actual_data_inputs = pipeline.fit_transform( data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"] assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle'))
def test_failure_trial_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: trial_split = given_failed_trial_split(trial) trial_json = trial.to_json() assert trial_json['status'] == TRIAL_STATUS.FAILED.value assert trial_json['error'] == str(trial_split.error) assert trial_json['error_traceback'] == EXPECTED_ERROR_TRACEBACK assert trial_json['main_metric_name'] == trial.main_metric_name assert then_failed_validation_split_json_is_valid( trial_json['validation_splits'][0], trial_split=trial_split) start_time = datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT) end_time = datetime.datetime.strptime( trial_json['end_time'], TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1) assert start_time < end_time
def _get_hyperparams(self): if self.return_all_sklearn_default_params_on_get: hp = self.wrapped_sklearn_predictor.get_params() self._delete_base_estimator_from_dict(hp) return HyperparameterSamples(hp) else: return BaseStep._get_hyperparams(self)
def create_callback_step(tape_step_name, hyperparams): step = (tape_step_name, TransformCallbackStepWithMockHasher( callback_function=TapeCallbackFunction().callback, more_arguments=[tape_step_name], hyperparams=HyperparameterSamples(hyperparams))) return step
def test_success_trial_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial) trial_json = trial.to_json() assert trial_json['status'] == TRIAL_STATUS.SUCCESS.value assert trial_json['error'] is None assert trial_json['error_traceback'] is None assert trial_json['main_metric_name'] == trial.main_metric_name assert then_success_trial_split_json_is_valid( trial_json['validation_splits'][0]) start_time = datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT) end_time = datetime.datetime.strptime( trial_json['end_time'], TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1) assert start_time < end_time
def setup(self): self.hp = HyperparameterSamples({'a': 2}) self.repo = InMemoryHyperparamsRepository() self.trial = Trial(trial_number=0, save_trial_function=self.repo.save_trial, hyperparams=self.hp, main_metric_name=MAIN_METRIC_NAME)
def test_trial_split_is_new_best_score_should_return_true_with_a_new_best_score_after_multiple_scores( ): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) assert trial_split.is_new_best_score()
def from_json(update_trial_function: Callable, trial_json: Dict, cache_folder: str = None) -> 'Trial': trial: Trial = Trial( trial_number=trial_json["trial_number"], main_metric_name=trial_json['main_metric_name'], status=TRIAL_STATUS(trial_json['status']), hyperparams=HyperparameterSamples(trial_json['hyperparams']), save_trial_function=update_trial_function, error=trial_json['error'], error_traceback=trial_json['error_traceback'], start_time=datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT), end_time=datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT), cache_folder=cache_folder, logger=None) trial.validation_splits = [ TrialSplit.from_json(trial=trial, trial_split_json=validation_split_json) for validation_split_json in trial_json['validation_splits'] ] return trial
def __init__(self, add=1): NonFittableMixin.__init__(self) BaseStep.__init__( self, hyperparams=HyperparameterSamples({ 'add': add }) )
def __init__(self, multiply_by=1): NonFittableMixin.__init__(self) BaseStep.__init__( self, hyperparams=HyperparameterSamples({ 'multiply_by': multiply_by }) )
def test_hyperparams_to_nested_dict(): dict_values = {'hp': 1, 'stepa__hp': 2, 'stepa__stepb__hp': 3} r = HyperparameterSamples(**dict_values) r = r.to_nested_dict() expected_dict_values = {'hp': 1, 'stepa': {'hp': 2, 'stepb': {'hp': 3}}} assert r.to_nested_dict_as_dict_primitive() == expected_dict_values
def test_apply_on_pipeline_with_meta_step_and_positional_argument_should_call_method_on_each_steps(): pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)]) pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2})) assert pipeline.get_hyperparams()['multiply_by'] == 2 assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2 assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
def test_optional_should_disable_wrapped_step_when_disabled(): p = Optional(MultiplyByN(2), nullified_return_value=[]).set_hyperparams( HyperparameterSamples({'enabled': False})) data_inputs = np.array(list(range(10))) outputs = p.transform(data_inputs) assert outputs == []
def test_apply_on_pipeline_with_positional_argument_should_call_method_on_each_steps(): pipeline = Pipeline([MultiplyByN(1), MultiplyByN(1)]) pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2})) assert pipeline.get_hyperparams()['multiply_by'] == 2 assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2 assert pipeline['MultiplyByN1'].get_hyperparams()['multiply_by'] == 2
def test_success_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial: trial_split = given_success_trial_validation_split(trial) trial_json = trial_split.to_json() then_success_trial_split_json_is_valid(trial_json)
def __init__(self, steps, hyperparams=None): FeatureUnion.__init__(self, steps) self._make_all_steps_optional() if hyperparams is None: self.set_hyperparams(HyperparameterSamples({ CHOICE_HYPERPARAM: list(self.keys())[0] }))
def __init__(self, steps, hyperparams=None): FeatureUnion.__init__(self, steps) if hyperparams is None: self.set_hyperparams(HyperparameterSamples({})) else: self.set_hyperparams(hyperparams) self._make_all_steps_optional()
def get_best_hyperparams(self) -> HyperparameterSamples: """ Get best hyperparams from all of the saved trials. :return: best hyperparams. """ trials = self.load_all_trials(status=TRIAL_STATUS.SUCCESS) best_hyperparams = HyperparameterSamples(trials.get_best_hyperparams()) return best_hyperparams
def test_failure_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name='mse') with trial: trial_split = given_failed_trial_split(trial) trial_json = trial_split.to_json() then_failed_validation_split_json_is_valid(trial_json, trial_split)
def test_hyperparams_repository_should_load_all_trials(tmpdir): hyperparams_json_repository = HyperparamsJSONRepository(tmpdir) for i in range(2): hyperparams = HyperparameterSamples({'learning_rate': 0.01 + i * 0.01}) hyperparams_json_repository.save_score_for_success_trial( hyperparams, i) trials = hyperparams_json_repository.load_all_trials() assert len(trials) == 2 assert trials[0].hyperparams == HyperparameterSamples({ 'learning_rate': 0.01 + 0 * 0.01 }).to_flat_as_dict_primitive() assert trials[1].hyperparams == HyperparameterSamples({ 'learning_rate': 0.01 + 1 * 0.01 }).to_flat_as_dict_primitive()