Esempio n. 1
0
def test_success_trial_to_json():
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name='mse')

    with trial:
        given_success_trial_validation_split(trial)

    trial_json = trial.to_json()

    assert trial_json['status'] == TRIAL_STATUS.SUCCESS.value
    assert trial_json['error'] is None
    assert trial_json['error_traceback'] is None
    assert trial_json['main_metric_name'] == trial.main_metric_name
    assert then_success_trial_split_json_is_valid(
        trial_json['validation_splits'][0])

    start_time = datetime.datetime.strptime(trial_json['start_time'],
                                            TRIAL_DATETIME_STR_FORMAT)
    end_time = datetime.datetime.strptime(
        trial_json['end_time'],
        TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1)

    assert start_time < end_time
Esempio n. 2
0
def test_failure_trial_to_json():
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name='mse')

    with trial:
        trial_split = given_failed_trial_split(trial)

    trial_json = trial.to_json()

    assert trial_json['status'] == TRIAL_STATUS.FAILED.value
    assert trial_json['error'] == str(trial_split.error)
    assert trial_json['error_traceback'] == EXPECTED_ERROR_TRACEBACK
    assert trial_json['main_metric_name'] == trial.main_metric_name
    assert then_failed_validation_split_json_is_valid(
        trial_json['validation_splits'][0], trial_split=trial_split)

    start_time = datetime.datetime.strptime(trial_json['start_time'],
                                            TRIAL_DATETIME_STR_FORMAT)
    end_time = datetime.datetime.strptime(
        trial_json['end_time'],
        TRIAL_DATETIME_STR_FORMAT) + datetime.timedelta(hours=1)

    assert start_time < end_time
Esempio n. 3
0
def test_trial_split_is_new_best_score_should_return_true_with_a_new_best_score_after_multiple_scores(
):
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.7,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.7,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.4,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.4,
                                                  higher_score_is_better=False)

    assert trial_split.is_new_best_score()
Esempio n. 4
0
    def save_score_for_success_trial(self, hyperparams: HyperparameterSamples, score: float):
        self.trials.append(Trial(hyperparams, score, TRIAL_STATUS.SUCCESS))

        if self.print_success_trial:
            self.print_func('score: {}'.format(score))
            self.print_func(
                'hyperparams:\n{}'.format(json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4)))
Esempio n. 5
0
 def setup(self):
     self.hp = HyperparameterSamples({'a': 2})
     self.repo = InMemoryHyperparamsRepository()
     self.trial = Trial(trial_number=0,
                        save_trial_function=self.repo.save_trial,
                        hyperparams=self.hp,
                        main_metric_name=MAIN_METRIC_NAME)
Esempio n. 6
0
    def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial:
        """
        Train pipeline using the validation splitter.
        Track training, and validation metrics for each epoch.
        Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. 
Refer to `execute_trial` for full flexibility

        :param pipeline: pipeline to train on
        :param data_inputs: data inputs
        :param expected_outputs: expected ouptuts to fit on
        :return: executed trial
        """
        validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container(
            DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs)
        )

        repo_trial: Trial = Trial(
            pipeline=pipeline,
            hyperparams=pipeline.get_hyperparams(),
            main_metric_name=self.get_main_metric_name()
        )

        self.execute_trial(
            pipeline=pipeline,
            trial_number=1,
            repo_trial=repo_trial,
            context=ExecutionContext(),
            validation_splits=validation_splits,
            n_trial=1,
            delete_pipeline_on_completion=False
        )

        return repo_trial
Esempio n. 7
0
def test_success_trial_split_to_json():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME)

    with trial:
        trial_split = given_success_trial_validation_split(trial)
        trial_json = trial_split.to_json()

    then_success_trial_split_json_is_valid(trial_json)
Esempio n. 8
0
def test_failure_trial_split_to_json():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name='mse')
    with trial:
        trial_split = given_failed_trial_split(trial)

    trial_json = trial_split.to_json()

    then_failed_validation_split_json_is_valid(trial_json, trial_split)
Esempio n. 9
0
def test_trials_get_best_hyperparams_should_return_hyperparams_of_best_trial():
    # Given
    hp_trial_1 = HyperparameterSamples({'a': 2})
    trial_1 = Trial(hyperparams=hp_trial_1, main_metric_name=MAIN_METRIC_NAME)
    with trial_1:
        given_success_trial_validation_split(trial_1, best_score=0.2)

    hp_trial_2 = HyperparameterSamples({'b': 3})
    trial_2 = Trial(hyperparams=hp_trial_2, main_metric_name=MAIN_METRIC_NAME)
    with trial_2:
        given_success_trial_validation_split(trial_2, best_score=0.1)

    trials = Trials(trials=[trial_1, trial_2])

    # When
    best_hyperparams = trials.get_best_hyperparams()

    # Then
    assert best_hyperparams == hp_trial_2
Esempio n. 10
0
def test_success_trial_get_validation_score():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name='mse')

    with trial:
        given_success_trial_validation_split(trial, best_score=0.3)

    validation_score = trial.get_validation_score()

    assert validation_score == 0.3
Esempio n. 11
0
def test_trial_should_create_new_split():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.set_success()

    assert isinstance(trial_split.start_time, datetime.datetime)
    assert isinstance(trial_split.end_time, datetime.datetime)
    assert trial_split.start_time < trial_split.end_time
    assert trial.validation_splits[0] == trial_split
Esempio n. 12
0
def test_success_trial_multiple_splits_should_average_the_scores():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name='mse')

    with trial:
        given_success_trial_validation_split(trial, best_score=0.3)
        given_success_trial_validation_split(trial, best_score=0.1)

    validation_score = trial.get_validation_score()

    assert validation_score == 0.2
Esempio n. 13
0
    def new_trial(self, auto_ml_container: 'AutoMLContainer') -> 'Trial':
        """
        Create a new trial with the best next hyperparams.

        :param auto_ml_container: auto ml data container
        :return: trial
        """
        hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(auto_ml_container)
        self.print_func('new trial:\n{}'.format(json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4)))

        return Trial(hyperparams=hyperparams, main_metric_name=auto_ml_container.main_scoring_metric_name)
Esempio n. 14
0
def test_success_trial_split_to_json():
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name=MAIN_METRIC_NAME)

    with trial:
        trial_split = given_success_trial_validation_split(trial)
        trial_json = trial_split.to_json()

    then_success_trial_split_json_is_valid(trial_json)
Esempio n. 15
0
def test_trial_with_failed_split_should_only_average_successful_splits():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name='mse')

    with trial:
        given_success_trial_validation_split(trial, best_score=0.3)
        given_success_trial_validation_split(trial, best_score=0.1)
        given_failed_trial_split(trial)

    validation_score = trial.get_validation_score()

    assert validation_score == 0.2
Esempio n. 16
0
def test_failure_trial_split_to_json():
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name='mse')
    with trial:
        trial_split = given_failed_trial_split(trial)

    trial_json = trial_split.to_json()

    then_failed_validation_split_json_is_valid(trial_json, trial_split)
Esempio n. 17
0
    def new_trial(self, auto_ml_container: 'AutoMLContainer'):
        """
        Create new hyperperams trial json file.

        :param auto_ml_container: auto ml container
        :return:
        """
        hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(auto_ml_container)
        trial = Trial(hyperparams, cache_folder=self.cache_folder,
                      main_metric_name=auto_ml_container.main_scoring_metric_name)
        self._create_trial_json(trial=trial)

        return trial
Esempio n. 18
0
def test_success_trial_get_validation_score():
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name='mse')

    with trial:
        given_success_trial_validation_split(trial, best_score=0.3)

    validation_score = trial.get_validation_score()

    assert validation_score == 0.3
Esempio n. 19
0
def test_trial_split_is_new_best_score_should_return_true_with_one_score():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

    assert trial_split.is_new_best_score()
Esempio n. 20
0
    def new_trial(self, auto_ml_container: 'AutoMLContainer') -> 'Trial':
        hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(
            auto_ml_container)
        logger = self._create_logger_for_trial(auto_ml_container.trial_number)
        logger.info('\nnew trial: {}'.format(
            json.dumps(hyperparams.to_nested_dict(), sort_keys=True,
                       indent=4)))

        return Trial(
            cache_folder=self.cache_folder,
            save_trial_function=self.save_trial,
            logger=logger,
            hyperparams=hyperparams,
            main_metric_name=auto_ml_container.main_scoring_metric_name)
Esempio n. 21
0
    def new_trial(self, auto_ml_container: 'AutoMLContainer') -> Trial:
        """
        Create a new trial with the best next hyperparams.

        :param context:
        :param auto_ml_container: auto ml data container
        :return: trial
        """
        hyperparams = self.hyperparameter_selection_strategy.find_next_best_hyperparams(
            auto_ml_container)

        trial = Trial(
            trial_number=auto_ml_container.trial_number,
            hyperparams=hyperparams,
            save_trial_function=self.save_trial,
            cache_folder=self.cache_folder,
            main_metric_name=auto_ml_container.main_scoring_metric_name)
        return trial
Esempio n. 22
0
    def train(self,
              pipeline: BaseStep,
              data_inputs,
              expected_outputs=None,
              context: ExecutionContext = None,
              trial_number=0) -> Trial:
        """
        Train pipeline using the validation splitter.
        Track training, and validation metrics for each epoch.
        Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. Refer to `execute_trial` for full flexibility

        :param pipeline: pipeline to train on
        :param data_inputs: data inputs
        :param expected_outputs: expected ouptuts to fit on
        :return: executed trial

        """
        assert not (
            context is None
        )  # TODO: change order of arguments so that context isn't an optional argument

        validation_splits: List[Tuple[
            DataContainer,
            DataContainer]] = self.validation_split_function.split_data_container(
                DataContainer(data_inputs=data_inputs,
                              expected_outputs=expected_outputs),
                context=context)

        repo_trial: Trial = Trial(
            pipeline=pipeline,
            logger=context.logger,
            hyperparams=pipeline.get_hyperparams(),
            main_metric_name=self.get_main_metric_name(),
            save_trial_function=self.hyperparams_repository.save_trial,
            trial_number=trial_number)

        self.execute_trial(pipeline=pipeline,
                           repo_trial=repo_trial,
                           context=context,
                           validation_splits=validation_splits,
                           n_trial=1,
                           delete_pipeline_on_completion=False)

        return repo_trial
Esempio n. 23
0
    def test_trials_get_best_hyperparams_should_return_hyperparams_of_best_trial(
            self):
        # Given
        trial_1 = self.trial
        with trial_1:
            self._given_success_trial_validation_split(trial_1, best_score=0.2)

        hp_trial_2 = HyperparameterSamples({'b': 3})
        trial_2 = Trial(trial_number=1,
                        save_trial_function=self.repo.save_trial,
                        hyperparams=hp_trial_2,
                        main_metric_name=MAIN_METRIC_NAME)
        with trial_2:
            self._given_success_trial_validation_split(trial_2, best_score=0.1)

        trials = Trials(trials=[trial_1, trial_2])

        # When
        best_hyperparams = trials.get_best_hyperparams()

        # Then
        assert best_hyperparams == hp_trial_2
Esempio n. 24
0
 def from_json(trial_json) -> 'Trial':
     return Trial(
         hyperparams=trial_json['hyperparams'],
         score=trial_json['score'],
         status=trial_json['status']
     )