예제 #1
0
    def result_command(self):
        """ this command is to launch aggregat of result.
        It can be executed using the 'result' keyword in the command argument.
        
        It will:
            * load result, params and error
            * merge them
            * save everything into two excel files
            
        """
        self.data_persister = FolderDataPersister(base_folder=self.base_folder)
        self.result_reader = AutoMlResultReader(self.data_persister)

        df_results = self.result_reader.load_all_results()
        df_additional_results = self.result_reader.load_additional_results()
        df_params = self.result_reader.load_all_params()
        df_errors = self.result_reader.load_all_errors()

        df_params_other = self.result_reader.load_all_other_params()

        df_merged_result = pd.merge(df_params,
                                    df_results,
                                    how="inner",
                                    on="job_id")
        df_merged_result = pd.merge(df_merged_result,
                                    df_params_other,
                                    how="inner",
                                    on="job_id")
        if df_additional_results.shape[0] > 0:
            df_merged_result = pd.merge(df_merged_result,
                                        df_additional_results,
                                        how="inner",
                                        on="job_id")

        df_merged_error = pd.merge(df_params,
                                   df_errors,
                                   how="inner",
                                   on="job_id")

        #        df_merged_result2 = pd.merge( df_params_other, df_results, how = "inner",on = "job_id")
        #        df_merged_result2 = df_merged_result2.sort_values(by="job_creation_time")

        try:
            df_merged_result.to_excel(self.base_folder + "/result.xlsx",
                                      index=False)
            print("file %s saved" % self.base_folder + "/result.xlsx")
        except OSError:
            print("I couldn't save excel file")

        try:
            df_merged_error.to_excel(self.base_folder + "/result_error.xlsx",
                                     index=False)
            print("file %s saved" % self.base_folder + "/result_error.xlsx")
        except OSError:
            print("I couldn't save excel file")

        return df_merged_result, df_merged_error
예제 #2
0
    def reload(self):
        """ method to reload dfX, y, auto_ml_config and job_config """
        self.data_persister = FolderDataPersister(base_folder=self.base_folder)

        self.job_config = self.data_persister.read(key="job_config", write_type=SavingType.pickle)

        self.auto_ml_config = self.data_persister.read(key="auto_ml_config", write_type=SavingType.pickle)
        self.dfX = self.data_persister.read(key="dfX", write_type=SavingType.pickle)
        self.y = self.data_persister.read(key="y", write_type=SavingType.pickle)
        self.groups = self.data_persister.read(key="groups", write_type=SavingType.pickle)
예제 #3
0
    def stop_command(self):
        """ this command is to stop the automl processes.
        It can be executed using the 'stop' keyword in the command argument.
        
        It will:
            * create stop file in controller/worker folder to stop them
        
        """

        self.data_persister = FolderDataPersister(base_folder=self.base_folder)
        self.data_persister.write("", key="stop", path="mljobmanager_workers", write_type=SavingType.txt)
        self.data_persister.write("", key="stop", path="mljobrunner_workers", write_type=SavingType.txt)
예제 #4
0
    def initialize(self):
        """ method to initialize auto_ml_config and job_config """

        ##################################
        ### ** load data and target ** ###
        ##################################
        if self.dfX is None or self.y is None:
            temp = self.loader()
            if len(temp) == 2:
                self.dfX, self.y = temp
                self.groups = None
            else:
                self.dfX, self.y, self.groups = temp

        ###########################################
        ### ** create database configuration ** ###
        ###########################################
        if self.auto_ml_config is None:
            self.auto_ml_config = AutoMlConfig(dfX=self.dfX,
                                               y=self.y,
                                               groups=self.groups,
                                               name=self.name)
            self.auto_ml_config.guess_everything()

        #####################################
        ### ** create job configuation ** ###
        #####################################
        if self.job_config is None:
            self.job_config = JobConfig()
            self.job_config.guess_cv(auto_ml_config=self.auto_ml_config,
                                     n_splits=10)
            self.job_config.guess_scoring(auto_ml_config=self.auto_ml_config)

        ###################################
        ### ** create data persister ** ###
        ###################################
        if self.data_persister is None:
            self.data_persister = FolderDataPersister(
                base_folder=self.base_folder)

        #################################
        ### ** apply custom config ** ###
        #################################
        if self.set_configs is not None:
            self.set_configs(self)
예제 #5
0
def test_create_everything_sequentially(num_only, tmpdir):

    # DataPersister
    data_persister = FolderDataPersister(base_folder=tmpdir)

    # Data
    dfX, y = loader(num_only)

    # Auto Ml Config
    auto_ml_config = AutoMlConfig(dfX, y)
    auto_ml_config.guess_everything()
    assert auto_ml_config

    # Job Config
    job_config = JobConfig()
    job_config.guess_scoring(auto_ml_config)
    job_config.guess_cv(auto_ml_config)
    assert job_config

    # Result Reader
    result_reader = AutoMlResultReader(data_persister)
    assert result_reader  # just verify the object was created

    # Auto ml guider
    auto_ml_guider = AutoMlModelGuider(result_reader=result_reader,
                                       job_config=job_config,
                                       metric_transformation="default",
                                       avg_metric=True)
    assert auto_ml_guider

    # Job Controller
    job_controller = MlJobManager(
        auto_ml_config=auto_ml_config,
        job_config=job_config,
        auto_ml_guider=auto_ml_guider,
        data_persister=data_persister,
        seed=None,
    )

    assert job_controller

    # Job Runner
    job_runner = MlJobRunner(
        dfX=dfX,
        y=y,
        groups=None,
        auto_ml_config=auto_ml_config,
        job_config=job_config,
        data_persister=data_persister,
        seed=None,
    )

    assert job_runner

    ### Do one iteration of the job_controller

    for i, (temp_job_id,
            temp_job_param) in enumerate(job_controller.iterate()):

        if i > 0:
            break  # I need to complete a loop, so I need the break to be AFTER second yield

        job_id = temp_job_id
        job_param = temp_job_param
        assert isinstance(job_id, str)
        assert isinstance(job_param, dict)

    ### retriveve job by worker
    for worker_job_id, worker_job_param in job_runner.iterate():
        break

    assert isinstance(worker_job_id, str)
    assert isinstance(worker_job_param, dict)

    assert worker_job_id == job_id
    assert worker_job_param == job_param