def test_success_trial_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial) trial_json = trial.to_json() assert trial_json['status'] == TRIAL_STATUS.SUCCESS.value assert trial_json['error'] is None assert trial_json['error_traceback'] is None assert trial_json['main_metric_name'] == trial.main_metric_name assert then_success_trial_split_json_is_valid( trial_json['validation_splits'][0]) start_time = datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT) end_time = datetime.datetime.strptime( trial_json['end_time'], TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1) assert start_time < end_time
def test_failure_trial_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: trial_split = given_failed_trial_split(trial) trial_json = trial.to_json() assert trial_json['status'] == TRIAL_STATUS.FAILED.value assert trial_json['error'] == str(trial_split.error) assert trial_json['error_traceback'] == EXPECTED_ERROR_TRACEBACK assert trial_json['main_metric_name'] == trial.main_metric_name assert then_failed_validation_split_json_is_valid( trial_json['validation_splits'][0], trial_split=trial_split) start_time = datetime.datetime.strptime(trial_json['start_time'], TRIAL_DATETIME_STR_FORMAT) end_time = datetime.datetime.strptime( trial_json['end_time'], TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1) assert start_time < end_time
def test_trial_split_is_new_best_score_should_return_true_with_a_new_best_score_after_multiple_scores( ): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) assert trial_split.is_new_best_score()
def save_score_for_success_trial(self, hyperparams: HyperparameterSamples, score: float): self.trials.append(Trial(hyperparams, score, TRIAL_STATUS.SUCCESS)) if self.print_success_trial: self.print_func('score: {}'.format(score)) self.print_func( 'hyperparams:\n{}'.format(json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4)))
def setup(self): self.hp = HyperparameterSamples({'a': 2}) self.repo = InMemoryHyperparamsRepository() self.trial = Trial(trial_number=0, save_trial_function=self.repo.save_trial, hyperparams=self.hp, main_metric_name=MAIN_METRIC_NAME)
def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial: """ Train pipeline using the validation splitter. Track training, and validation metrics for each epoch. Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. Refer to `execute_trial` for full flexibility :param pipeline: pipeline to train on :param data_inputs: data inputs :param expected_outputs: expected ouptuts to fit on :return: executed trial """ validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container( DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs) ) repo_trial: Trial = Trial( pipeline=pipeline, hyperparams=pipeline.get_hyperparams(), main_metric_name=self.get_main_metric_name() ) self.execute_trial( pipeline=pipeline, trial_number=1, repo_trial=repo_trial, context=ExecutionContext(), validation_splits=validation_splits, n_trial=1, delete_pipeline_on_completion=False ) return repo_trial
def test_success_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial: trial_split = given_success_trial_validation_split(trial) trial_json = trial_split.to_json() then_success_trial_split_json_is_valid(trial_json)
def test_failure_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name='mse') with trial: trial_split = given_failed_trial_split(trial) trial_json = trial_split.to_json() then_failed_validation_split_json_is_valid(trial_json, trial_split)
def test_trials_get_best_hyperparams_should_return_hyperparams_of_best_trial(): # Given hp_trial_1 = HyperparameterSamples({'a': 2}) trial_1 = Trial(hyperparams=hp_trial_1, main_metric_name=MAIN_METRIC_NAME) with trial_1: given_success_trial_validation_split(trial_1, best_score=0.2) hp_trial_2 = HyperparameterSamples({'b': 3}) trial_2 = Trial(hyperparams=hp_trial_2, main_metric_name=MAIN_METRIC_NAME) with trial_2: given_success_trial_validation_split(trial_2, best_score=0.1) trials = Trials(trials=[trial_1, trial_2]) # When best_hyperparams = trials.get_best_hyperparams() # Then assert best_hyperparams == hp_trial_2
def test_success_trial_get_validation_score(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial, best_score=0.3) validation_score = trial.get_validation_score() assert validation_score == 0.3
def test_trial_should_create_new_split(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.set_success() assert isinstance(trial_split.start_time, datetime.datetime) assert isinstance(trial_split.end_time, datetime.datetime) assert trial_split.start_time < trial_split.end_time assert trial.validation_splits[0] == trial_split
def test_success_trial_multiple_splits_should_average_the_scores(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial, best_score=0.3) given_success_trial_validation_split(trial, best_score=0.1) validation_score = trial.get_validation_score() assert validation_score == 0.2
def new_trial(self, auto_ml_container: 'AutoMLContainer') -> 'Trial': """ Create a new trial with the best next hyperparams. :param auto_ml_container: auto ml data container :return: trial """ hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(auto_ml_container) self.print_func('new trial:\n{}'.format(json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4))) return Trial(hyperparams=hyperparams, main_metric_name=auto_ml_container.main_scoring_metric_name)
def test_success_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial: trial_split = given_success_trial_validation_split(trial) trial_json = trial_split.to_json() then_success_trial_split_json_is_valid(trial_json)
def test_trial_with_failed_split_should_only_average_successful_splits(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial, best_score=0.3) given_success_trial_validation_split(trial, best_score=0.1) given_failed_trial_split(trial) validation_score = trial.get_validation_score() assert validation_score == 0.2
def test_failure_trial_split_to_json(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: trial_split = given_failed_trial_split(trial) trial_json = trial_split.to_json() then_failed_validation_split_json_is_valid(trial_json, trial_split)
def new_trial(self, auto_ml_container: 'AutoMLContainer'): """ Create new hyperperams trial json file. :param auto_ml_container: auto ml container :return: """ hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(auto_ml_container) trial = Trial(hyperparams, cache_folder=self.cache_folder, main_metric_name=auto_ml_container.main_scoring_metric_name) self._create_trial_json(trial=trial) return trial
def test_success_trial_get_validation_score(): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name='mse') with trial: given_success_trial_validation_split(trial, best_score=0.3) validation_score = trial.get_validation_score() assert validation_score == 0.3
def test_trial_split_is_new_best_score_should_return_true_with_one_score(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) assert trial_split.is_new_best_score()
def new_trial(self, auto_ml_container: 'AutoMLContainer') -> 'Trial': hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams( auto_ml_container) logger = self._create_logger_for_trial(auto_ml_container.trial_number) logger.info('\nnew trial: {}'.format( json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4))) return Trial( cache_folder=self.cache_folder, save_trial_function=self.save_trial, logger=logger, hyperparams=hyperparams, main_metric_name=auto_ml_container.main_scoring_metric_name)
def new_trial(self, auto_ml_container: 'AutoMLContainer') -> Trial: """ Create a new trial with the best next hyperparams. :param context: :param auto_ml_container: auto ml data container :return: trial """ hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams( auto_ml_container) trial = Trial( trial_number=auto_ml_container.trial_number, hyperparams=hyperparams, save_trial_function=self.save_trial, cache_folder=self.cache_folder, main_metric_name=auto_ml_container.main_scoring_metric_name) return trial
def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None, context: ExecutionContext = None, trial_number=0) -> Trial: """ Train pipeline using the validation splitter. Track training, and validation metrics for each epoch. Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. Refer to `execute_trial` for full flexibility :param pipeline: pipeline to train on :param data_inputs: data inputs :param expected_outputs: expected ouptuts to fit on :return: executed trial """ assert not ( context is None ) # TODO: change order of arguments so that context isn't an optional argument validation_splits: List[Tuple[ DataContainer, DataContainer]] = self.validation_split_function.split_data_container( DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs), context=context) repo_trial: Trial = Trial( pipeline=pipeline, logger=context.logger, hyperparams=pipeline.get_hyperparams(), main_metric_name=self.get_main_metric_name(), save_trial_function=self.hyperparams_repository.save_trial, trial_number=trial_number) self.execute_trial(pipeline=pipeline, repo_trial=repo_trial, context=context, validation_splits=validation_splits, n_trial=1, delete_pipeline_on_completion=False) return repo_trial
def test_trials_get_best_hyperparams_should_return_hyperparams_of_best_trial( self): # Given trial_1 = self.trial with trial_1: self._given_success_trial_validation_split(trial_1, best_score=0.2) hp_trial_2 = HyperparameterSamples({'b': 3}) trial_2 = Trial(trial_number=1, save_trial_function=self.repo.save_trial, hyperparams=hp_trial_2, main_metric_name=MAIN_METRIC_NAME) with trial_2: self._given_success_trial_validation_split(trial_2, best_score=0.1) trials = Trials(trials=[trial_1, trial_2]) # When best_hyperparams = trials.get_best_hyperparams() # Then assert best_hyperparams == hp_trial_2
def from_json(trial_json) -> 'Trial': return Trial( hyperparams=trial_json['hyperparams'], score=trial_json['score'], status=trial_json['status'] )