def result_command(self): """ this command is to launch aggregat of result. It can be executed using the 'result' keyword in the command argument. It will: * load result, params and error * merge them * save everything into two excel files """ self.data_persister = FolderDataPersister(base_folder=self.base_folder) self.result_reader = AutoMlResultReader(self.data_persister) df_results = self.result_reader.load_all_results() df_additional_results = self.result_reader.load_additional_results() df_params = self.result_reader.load_all_params() df_errors = self.result_reader.load_all_errors() df_params_other = self.result_reader.load_all_other_params() df_merged_result = pd.merge(df_params, df_results, how="inner", on="job_id") df_merged_result = pd.merge(df_merged_result, df_params_other, how="inner", on="job_id") if df_additional_results.shape[0] > 0: df_merged_result = pd.merge(df_merged_result, df_additional_results, how="inner", on="job_id") df_merged_error = pd.merge(df_params, df_errors, how="inner", on="job_id") # df_merged_result2 = pd.merge( df_params_other, df_results, how = "inner",on = "job_id") # df_merged_result2 = df_merged_result2.sort_values(by="job_creation_time") try: df_merged_result.to_excel(self.base_folder + "/result.xlsx", index=False) print("file %s saved" % self.base_folder + "/result.xlsx") except OSError: print("I couldn't save excel file") try: df_merged_error.to_excel(self.base_folder + "/result_error.xlsx", index=False) print("file %s saved" % self.base_folder + "/result_error.xlsx") except OSError: print("I couldn't save excel file") return df_merged_result, df_merged_error
def reload(self): """ method to reload dfX, y, auto_ml_config and job_config """ self.data_persister = FolderDataPersister(base_folder=self.base_folder) self.job_config = self.data_persister.read(key="job_config", write_type=SavingType.pickle) self.auto_ml_config = self.data_persister.read(key="auto_ml_config", write_type=SavingType.pickle) self.dfX = self.data_persister.read(key="dfX", write_type=SavingType.pickle) self.y = self.data_persister.read(key="y", write_type=SavingType.pickle) self.groups = self.data_persister.read(key="groups", write_type=SavingType.pickle)
def stop_command(self): """ this command is to stop the automl processes. It can be executed using the 'stop' keyword in the command argument. It will: * create stop file in controller/worker folder to stop them """ self.data_persister = FolderDataPersister(base_folder=self.base_folder) self.data_persister.write("", key="stop", path="mljobmanager_workers", write_type=SavingType.txt) self.data_persister.write("", key="stop", path="mljobrunner_workers", write_type=SavingType.txt)
def initialize(self): """ method to initialize auto_ml_config and job_config """ ################################## ### ** load data and target ** ### ################################## if self.dfX is None or self.y is None: temp = self.loader() if len(temp) == 2: self.dfX, self.y = temp self.groups = None else: self.dfX, self.y, self.groups = temp ########################################### ### ** create database configuration ** ### ########################################### if self.auto_ml_config is None: self.auto_ml_config = AutoMlConfig(dfX=self.dfX, y=self.y, groups=self.groups, name=self.name) self.auto_ml_config.guess_everything() ##################################### ### ** create job configuation ** ### ##################################### if self.job_config is None: self.job_config = JobConfig() self.job_config.guess_cv(auto_ml_config=self.auto_ml_config, n_splits=10) self.job_config.guess_scoring(auto_ml_config=self.auto_ml_config) ################################### ### ** create data persister ** ### ################################### if self.data_persister is None: self.data_persister = FolderDataPersister( base_folder=self.base_folder) ################################# ### ** apply custom config ** ### ################################# if self.set_configs is not None: self.set_configs(self)
def test_create_everything_sequentially(num_only, tmpdir): # DataPersister data_persister = FolderDataPersister(base_folder=tmpdir) # Data dfX, y = loader(num_only) # Auto Ml Config auto_ml_config = AutoMlConfig(dfX, y) auto_ml_config.guess_everything() assert auto_ml_config # Job Config job_config = JobConfig() job_config.guess_scoring(auto_ml_config) job_config.guess_cv(auto_ml_config) assert job_config # Result Reader result_reader = AutoMlResultReader(data_persister) assert result_reader # just verify the object was created # Auto ml guider auto_ml_guider = AutoMlModelGuider(result_reader=result_reader, job_config=job_config, metric_transformation="default", avg_metric=True) assert auto_ml_guider # Job Controller job_controller = MlJobManager( auto_ml_config=auto_ml_config, job_config=job_config, auto_ml_guider=auto_ml_guider, data_persister=data_persister, seed=None, ) assert job_controller # Job Runner job_runner = MlJobRunner( dfX=dfX, y=y, groups=None, auto_ml_config=auto_ml_config, job_config=job_config, data_persister=data_persister, seed=None, ) assert job_runner ### Do one iteration of the job_controller for i, (temp_job_id, temp_job_param) in enumerate(job_controller.iterate()): if i > 0: break # I need to complete a loop, so I need the break to be AFTER second yield job_id = temp_job_id job_param = temp_job_param assert isinstance(job_id, str) assert isinstance(job_param, dict) ### retriveve job by worker for worker_job_id, worker_job_param in job_runner.iterate(): break assert isinstance(worker_job_id, str) assert isinstance(worker_job_param, dict) assert worker_job_id == job_id assert worker_job_param == job_param