Example #1
0
 def fit_ensemble(self,
                  task_id=None,
                  hdl_id=None,
                  trials_fetcher="GetBestK",
                  trials_fetcher_params=frozendict(k=10),
                  ensemble_type="stack",
                  ensemble_params=frozendict(),
                  return_Xy_test=False):
     if task_id is None:
         assert hasattr(
             self.resource_manager,
             "task_id") and self.resource_manager.task_id is not None
         task_id = self.resource_manager.task_id
     # if hdl_id is None:
     #     assert hasattr(self.resource_manager, "hdl_id") and self.resource_manager.hdl_id is not None
     #     hdl_id = self.resource_manager.hdl_id
     trials_fetcher_name = trials_fetcher
     from autoflow.ensemble import trials_fetcher
     assert hasattr(trials_fetcher, trials_fetcher_name)
     trials_fetcher_cls = getattr(trials_fetcher, trials_fetcher_name)
     trials_fetcher: TrialsFetcher = trials_fetcher_cls(
         resource_manager=self.resource_manager,
         task_id=task_id,
         hdl_id=hdl_id,
         **trials_fetcher_params)
     trial_ids = trials_fetcher.fetch()
     estimator_list, y_true_indexes_list, y_preds_list = TrainedDataFetcher(
         task_id, hdl_id, trial_ids, self.resource_manager).fetch()
     ml_task, Xy_train, Xy_test = self.resource_manager.get_ensemble_needed_info(
         task_id, hdl_id)
     y_true = Xy_train[1]
     ensemble_estimator_package_name = f"autoflow.ensemble.{ensemble_type}.{ml_task.role}"
     ensemble_estimator_package = import_module(
         ensemble_estimator_package_name)
     ensemble_estimator_class_name = get_class_name_of_module(
         ensemble_estimator_package_name)
     ensemble_estimator_class = getattr(ensemble_estimator_package,
                                        ensemble_estimator_class_name)
     ensemble_estimator: EnsembleEstimator = ensemble_estimator_class(
         **ensemble_params)
     ensemble_estimator.fit_trained_data(estimator_list,
                                         y_true_indexes_list, y_preds_list,
                                         y_true)
     self.ensemble_estimator = ensemble_estimator
     if return_Xy_test:
         return self.ensemble_estimator, Xy_test
     else:
         return self.ensemble_estimator
Example #2
0
 def get_forbid_hit_in_models_by_rely(self,
                                      models,
                                      rely_model="boost_model"):
     forbid_in_value = []
     hit = []
     for model in models:
         module_path = f"autoflow.pipeline.components.{self.ml_task.mainTask}.{model}"
         _class = get_class_name_of_module(module_path)
         M = import_module(module_path)
         cls = getattr(M, _class)
         is_hit = getattr(cls, rely_model, False)
         if not is_hit:
             forbid_in_value.append(model)
         else:
             hit.append(model)
     return forbid_in_value, hit
Example #3
0
 def fit_ensemble(self,
                  task_id=None,
                  hdl_id=None,
                  trials_fetcher_cls="GetBestK",
                  trials_fetcher_params=frozendict(k=10),
                  ensemble_type="stack",
                  ensemble_params=frozendict(),
                  fit_ensemble_alone=True):
     # fixme: ensemble_params可能会面临一个问题,就是传入无法序列化的内容
     trials_fetcher_params = dict(trials_fetcher_params)
     ensemble_params = dict(ensemble_params)
     kwargs = get_valid_params_in_kwargs(self.fit_ensemble, locals())
     if task_id is None:
         assert hasattr(
             self.resource_manager,
             "task_id") and self.resource_manager.task_id is not None
         task_id = self.resource_manager.task_id
     self.task_id = task_id
     self.resource_manager.task_id = task_id
     if hdl_id is not None:
         self.hdl_id = hdl_id
         self.resource_manager.hdl_id = hdl_id
     if fit_ensemble_alone:
         setup_logger(self.log_path, self.log_config)
         if fit_ensemble_alone:
             experiment_config = {"fit_ensemble_params": kwargs}
             self.resource_manager.insert_experiment_record(
                 ExperimentType.ENSEMBLE, experiment_config, {})
             self.experiment_id = self.resource_manager.experiment_id
     from autoflow.ensemble import trials_fetcher
     assert hasattr(trials_fetcher, trials_fetcher_cls)
     trials_fetcher_cls = getattr(trials_fetcher, trials_fetcher_cls)
     trials_fetcher_inst: TrialsFetcher = trials_fetcher_cls(
         resource_manager=self.resource_manager,
         task_id=task_id,
         hdl_id=hdl_id,
         **trials_fetcher_params)
     trial_ids = trials_fetcher_inst.fetch()
     estimator_list, y_true_indexes_list, y_preds_list = TrainedDataFetcher(
         task_id, hdl_id, trial_ids, self.resource_manager).fetch()
     # todo: 在这里,只取了验证集的数据,没有取测试集的数据。待拓展
     ml_task, y_true = self.resource_manager.get_ensemble_needed_info(
         task_id)
     if len(estimator_list) == 0:
         raise ValueError("Length of estimator_list must >=1. ")
     elif len(estimator_list) == 1:
         self.logger.info(
             "Length of estimator_list == 1, don't do ensemble.")
         if ml_task.mainTask == "classification":
             ensemble_estimator = VoteClassifier(estimator_list[0])
         else:
             ensemble_estimator = MeanRegressor(estimator_list[0])
     else:
         ensemble_estimator_package_name = f"autoflow.ensemble.{ensemble_type}.{ml_task.role}"
         ensemble_estimator_package = import_module(
             ensemble_estimator_package_name)
         ensemble_estimator_class_name = get_class_name_of_module(
             ensemble_estimator_package_name)
         ensemble_estimator_class = getattr(ensemble_estimator_package,
                                            ensemble_estimator_class_name)
         # ensemble_estimator : EnsembleEstimator
         ensemble_estimator = ensemble_estimator_class(**ensemble_params)
         ensemble_estimator.fit_trained_data(estimator_list,
                                             y_true_indexes_list,
                                             y_preds_list, y_true)
     self.ensemble_estimator = ensemble_estimator
     if fit_ensemble_alone:
         self.estimator = self.ensemble_estimator
         self.resource_manager.finish_experiment(self.log_path, self)
     return self.ensemble_estimator