Esempio n. 1
0
    def _hp_search(self, input_df, validation_df, metric, recipe, mc,
                   resources_per_trial, remote_dir):
        ft = self.create_feature_transformer()
        feature_list = ft.get_feature_list()

        model_fn = partial(self.create_model,
                           resources_per_trial=resources_per_trial)

        # prepare parameters for search engine
        search_space = recipe.search_space(feature_list)

        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(
            input_df,
            model_create_func=model_fn,
            search_space=search_space,
            recipe=recipe,
            feature_transformers=ft,
            validation_df=validation_df,
            metric=metric,
            mc=mc,
        )
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       feature_transformers=ft,
                                       model_create_fn=model_fn,
                                       remote_dir=remote_dir)
        return pipeline
Esempio n. 2
0
    def _hp_search(self,
                   input_df,
                   validation_df,
                   metric,
                   recipe,
                   mc,
                   resources_per_trial,
                   remote_dir):
        ft = self.create_feature_transformer()

        model_fn = self.make_model_fn(resources_per_trial)

        self._detach_recipe(recipe)

        # prepare parameters for search engine

        searcher = RayTuneSearchEngine(logs_dir=self.logs_dir,
                                       resources_per_trial=resources_per_trial,
                                       name=self.name,
                                       remote_dir=remote_dir,
                                       )
        searcher.compile(data=input_df,
                         model_create_func=model_fn,
                         validation_data=validation_df,
                         search_space=self.search_space,
                         n_sampling=self.num_samples,
                         epochs=self.epochs,
                         metric_threshold=self.metric_threshold,
                         search_alg=self.search_alg,
                         search_alg_params=self.search_alg_params,
                         scheduler=self.scheduler,
                         scheduler_params=self.scheduler_params,
                         feature_transformers=ft,
                         metric=metric,
                         mc=mc,
                         )
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       feature_transformers=ft,
                                       model=model_fn(),
                                       remote_dir=remote_dir)
        return pipeline
    def _hp_search(self, input_df, validation_df, metric, recipe, mc,
                   resources_per_trial, remote_dir):
        ft = self.create_feature_transformer()
        try:
            feature_list = ft.get_feature_list()
        except:
            feature_list = None

        model_fn = self.make_model_fn(resources_per_trial)

        # prepare parameters for search engine
        search_space = recipe.search_space(feature_list)

        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(
            data={
                'df': input_df,
                'val_df': validation_df
            },
            model_create_func=model_fn,
            search_space=search_space,
            recipe=recipe,
            search_alg=self.search_alg,
            search_alg_params=self.search_alg_params,
            scheduler=self.scheduler,
            scheduler_params=self.scheduler_params,
            feature_transformers=ft,
            metric=metric,
            mc=mc,
        )
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       feature_transformers=ft,
                                       model=model_fn(),
                                       remote_dir=remote_dir)
        return pipeline
    def test_searcher_metric(self):
        train_x, train_y, val_x, val_y = get_np_input()
        data = (train_x, train_y)
        val_data = (val_x, val_y)

        # test metric name is returned and max mode can be stopped
        searcher = prepare_searcher(data=data,
                                    validation_data=val_data,
                                    name='test_searcher_metric_name',
                                    metric='mse',
                                    search_space=create_simple_search_space(),
                                    stop=create_stop(float('inf')))
        analysis = searcher.run()
        sorted_results = list(
            map(
                lambda x: x.last_result['mse'],
                RayTuneSearchEngine._get_sorted_trials(analysis.trials,
                                                       metric='mse',
                                                       mode="min")))

        # assert metric name is reported
        assert 'mse' in analysis.trials[0].last_result.keys()
        # assert _get_sorted_trials get increasing result
        assert all(sorted_results[i] <= sorted_results[i + 1]
                   for i in range(len(sorted_results) - 1))
        # assert _get_best_result get minimum result
        assert RayTuneSearchEngine._get_best_result(
            analysis.trials, metric='mse',
            mode="min")['mse'] == sorted_results[0]
        assert all(analysis.trials[i].last_result['mse'] >=
                   analysis.trials[i].last_result['best_mse']
                   for i in range(len(sorted_results)))
        # assert the trail stop at once since mse has mode of 'min'
        assert analysis.trials[0].last_result['iterations_since_restore'] == 1

        # max mode metric with stop
        searcher = prepare_searcher(data=data,
                                    validation_data=val_data,
                                    name='test_searcher_metric_name',
                                    metric='r2',
                                    search_space=create_simple_search_space(),
                                    stop=create_stop(0))  # stop at once
        analysis = searcher.run()
        sorted_results = list(
            map(
                lambda x: x.last_result['r2'],
                RayTuneSearchEngine._get_sorted_trials(analysis.trials,
                                                       metric='r2',
                                                       mode="max")))

        # assert metric name is reported
        assert 'r2' in analysis.trials[0].last_result.keys()
        # assert _get_sorted_trials get decreasing result
        assert all(sorted_results[i] >= sorted_results[i + 1]
                   for i in range(len(sorted_results) - 1))
        # assert _get_best_result get maximum result
        assert RayTuneSearchEngine._get_best_result(
            analysis.trials, metric='r2',
            mode="max")['r2'] == sorted_results[0]
        assert all(analysis.trials[i].last_result['r2'] <=
                   analysis.trials[i].last_result['best_r2']
                   for i in range(len(sorted_results)))
        # assert the trail stop at once since mse has mode of 'max'
        assert analysis.trials[0].last_result['iterations_since_restore'] == 1

        # test min mode metric without stop
        searcher = prepare_searcher(
            data=data,
            validation_data=val_data,
            name='test_searcher_metric_name',
            metric='mae',
            search_space=create_simple_search_space(),
            stop=create_stop(0))  # never stop by metric
        analysis = searcher.run()
        sorted_results = list(
            map(
                lambda x: x.last_result['mae'],
                RayTuneSearchEngine._get_sorted_trials(analysis.trials,
                                                       metric='mae',
                                                       mode="min")))

        # assert metric name is reported
        assert 'mae' in analysis.trials[0].last_result.keys()
        # assert _get_sorted_trials get increasing result
        assert all(sorted_results[i] <= sorted_results[i + 1]
                   for i in range(len(sorted_results) - 1))
        # assert _get_best_result get minimum result
        assert RayTuneSearchEngine._get_best_result(
            analysis.trials, metric='mae',
            mode="min")['mae'] == sorted_results[0]
        assert all(analysis.trials[i].last_result['mae'] >=
                   analysis.trials[i].last_result['best_mae']
                   for i in range(len(sorted_results)))
        # assert the trail stop at once since mse has mode of 'min'
        assert analysis.trials[0].last_result['iterations_since_restore'] == 20