Ejemplo n.º 1
0
    def save(self, path: str = None, verbose=True) -> str:
        """
        Saves the model to disk.

        Parameters
        ----------
        path : str, default None
            Path to the saved model, minus the file name.
            This should generally be a directory path ending with a '/' character (or appropriate path separator value depending on OS).
            If None, self.path is used.
            The final model file is typically saved to path + self.model_file_name.
        verbose : bool, default True
            Whether to log the location of the saved file.

        Returns
        -------
        path : str
            Path to the saved model, minus the file name.
            Use this value to load the model from disk via cls.load(path), cls being the class of the model object, such as model = RFModel.load(path)
        """
        if path is None:
            path = self.path
        file_path = path + self.model_file_name
        save_pkl.save(path=file_path, object=self, verbose=verbose)
        return path
Ejemplo n.º 2
0
    def save_info(self) -> dict:
        info = self.get_info()

        save_pkl.save(path=self.path + self.model_info_name, object=info)
        json_path = self.path + self.model_info_json_name
        save_json.save(path=json_path, obj=info)
        return info
Ejemplo n.º 3
0
def save_artifacts(predictor, leaderboard, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        models_dir = output_subdir("models", config)
        shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True)

        if 'leaderboard' in artifacts:
            save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"), df=leaderboard)

        if 'info' in artifacts:
            ag_info = predictor.info()
            info_dir = output_subdir("info", config)
            save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info)

        if 'models' in artifacts:
            utils.zip_path(models_dir,
                           os.path.join(models_dir, "models.zip"))

        def delete(path, isdir):
            if isdir:
                shutil.rmtree(path, ignore_errors=True)
            elif os.path.splitext(path)[1] == '.pkl':
                os.remove(path)
        utils.walk_apply(models_dir, delete, max_depth=0)

    except Exception:
        log.warning("Error when saving artifacts.", exc_info=True)
Ejemplo n.º 4
0
    def save(self,
             path=None,
             verbose=True,
             save_oof=True,
             save_children=False) -> str:
        if path is None:
            path = self.path

        if save_children:
            model_names = []
            for child in self.models:
                child = self.load_child(child)
                child.set_contexts(path + child.name + os.path.sep)
                child.save(verbose=False)
                model_names.append(child.name)
            self.models = model_names

        if save_oof and self._oof_pred_proba is not None:
            save_pkl.save(path=path + 'utils' + os.path.sep +
                          self._oof_filename,
                          object={
                              '_oof_pred_proba':
                              self._oof_pred_proba,
                              '_oof_pred_model_repeats':
                              self._oof_pred_model_repeats,
                          })
            self._oof_pred_proba = None
            self._oof_pred_model_repeats = None

        return super().save(path=path, verbose=verbose)
Ejemplo n.º 5
0
 def save(self):
     trainer = None
     if self.trainer is not None:
         if not self.is_trainer_present:
             self.trainer.save()
             trainer = self.trainer
             self.trainer = None
     save_pkl.save(path=self.save_path, object=self)
     self.trainer = trainer
Ejemplo n.º 6
0
 def save(self) -> None:
     """Save this predictor to file in directory specified by this Predictor's ``path``.
     Note that :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit` already saves the predictor object automatically
     (we do not recommend modifying the Predictor object yourself as it tracks many trained models).
     """
     tmp_learner = self._learner
     self._learner = None
     save_pkl.save(path=tmp_learner.path + self.predictor_file_name, object=self)
     self._learner = tmp_learner
Ejemplo n.º 7
0
    def save(self) -> None:
        models = self.models
        self.models = {}

        save_pkl.save(path=self.path_pkl, object=self)
        for model in self.models.values():
            model.save()

        self.models = models
Ejemplo n.º 8
0
 def save(self):
     tmp_learner = self._learner
     tmp_trainer = self._trainer
     super().save()
     self._learner = None
     self._trainer = None
     save_pkl.save(path=tmp_learner.path + self.predictor_file_name,
                   object=self)
     self._learner = tmp_learner
     self._trainer = tmp_trainer
Ejemplo n.º 9
0
 def _callback(env):
     if not eval_result:
         _init(env)
     for data_name, eval_name, result, _ in env.evaluation_result_list:
         eval_result[data_name][eval_name].append(result)
     if (interval > 0) and ((env.iteration - offset) % interval
                            == 0) and (env.iteration != 0):
         # min_error = min(eval_result['valid_set']['multi_error'])
         # print('iter:', env.iteration, 'min_error:', min_error)
         save_pkl.save(path=path, object=eval_result)
Ejemplo n.º 10
0
 def save(self, file_prefix=""):
     """ Additional naming changes will be appended to end of file_prefix (must contain full absolute path) """
     dataobj_file = file_prefix + self.DATAOBJ_SUFFIX
     datalist_file = file_prefix + self.DATAVALUES_SUFFIX
     data_list = self.dataset._data
     self.dataset = None  # Avoid pickling these
     self.dataloader = None
     save_pkl.save(path=dataobj_file, object=self)
     mx.nd.save(datalist_file, data_list)
     logger.debug("TabularNN Dataset saved to files: \n %s \n %s" %
                  (dataobj_file, datalist_file))
Ejemplo n.º 11
0
 def save(self) -> None:
     # todo: remove / revise low_memory logic
     models = self.models
     if self.low_memory:
         self.models = {}
     try:
         save_pkl.save(path=self.path_pkl, object=self)
     except:  # noqa
         self.models = {}
         save_pkl.save(path=self.path_pkl, object=self)
     if not self.models:
         self.models = models
Ejemplo n.º 12
0
    def hyperparameter_tune(self, X_train, y_train, X_val, y_val, scheduler_options, **kwargs):
        # verbosity = kwargs.get('verbosity', 2)
        time_start = time.time()
        logger.log(15, "Starting generic AbstractModel hyperparameter tuning for %s model..." % self.name)
        self._set_default_searchspace()
        params_copy = self.params.copy()
        directory = self.path  # also create model directory if it doesn't exist
        # TODO: This will break on S3. Use tabular/utils/savers for datasets, add new function
        scheduler_func, scheduler_options = scheduler_options  # Unpack tuple
        if scheduler_func is None or scheduler_options is None:
            raise ValueError("scheduler_func and scheduler_options cannot be None for hyperparameter tuning")
        params_copy['num_threads'] = scheduler_options['resource'].get('num_cpus', None)
        params_copy['num_gpus'] = scheduler_options['resource'].get('num_gpus', None)
        dataset_train_filename = 'dataset_train.p'
        train_path = directory + dataset_train_filename
        save_pkl.save(path=train_path, object=(X_train, y_train))

        dataset_val_filename = 'dataset_val.p'
        val_path = directory + dataset_val_filename
        save_pkl.save(path=val_path, object=(X_val, y_val))

        if not any(isinstance(params_copy[hyperparam], Space) for hyperparam in params_copy):
            logger.warning("Attempting to do hyperparameter optimization without any search space (all hyperparameters are already fixed values)")
        else:
            logger.log(15, "Hyperparameter search space for %s model: " % self.name)
            for hyperparam in params_copy:
                if isinstance(params_copy[hyperparam], Space):
                    logger.log(15, f"{hyperparam}:   {params_copy[hyperparam]}")

        util_args = dict(
            dataset_train_filename=dataset_train_filename,
            dataset_val_filename=dataset_val_filename,
            directory=directory,
            model=self,
            time_start=time_start,
            time_limit=scheduler_options['time_out'],
        )

        model_trial.register_args(util_args=util_args, **params_copy)
        scheduler: FIFOScheduler = scheduler_func(model_trial, **scheduler_options)
        if ('dist_ip_addrs' in scheduler_options) and (len(scheduler_options['dist_ip_addrs']) > 0):
            # This is multi-machine setting, so need to copy dataset to workers:
            logger.log(15, "Uploading data to remote workers...")
            scheduler.upload_files([train_path, val_path])  # TODO: currently does not work.
            directory = self.path  # TODO: need to change to path to working directory used on every remote machine
            model_trial.update(directory=directory)
            logger.log(15, "uploaded")

        scheduler.run()
        scheduler.join_jobs()

        return self._get_hpo_results(scheduler=scheduler, scheduler_options=scheduler_options, time_start=time_start)
Ejemplo n.º 13
0
    def save(self, path: str = None, **kwargs) -> str:
        if path is None:
            path = self.path
        path = Path(path)
        path.mkdir(exist_ok=True)

        predictor = self.gts_predictor
        self.gts_predictor = None

        with disable_root_logger():
            if predictor:
                Path.mkdir(path / self.gluonts_model_path, exist_ok=True)
                predictor.serialize(path / self.gluonts_model_path)

        save_pkl.save(path=str(path / self.model_file_name), object=self)
        self.gts_predictor = predictor

        return str(path)
Ejemplo n.º 14
0
def save_artifacts(predictor, leaderboard, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        if 'leaderboard' in artifacts:
            leaderboard_dir = output_subdir("leaderboard", config)
            save_pd.save(path=os.path.join(leaderboard_dir, "leaderboard.csv"), df=leaderboard)

        if 'info' in artifacts:
            ag_info = predictor.info()
            info_dir = output_subdir("info", config)
            save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info)

        if 'models' in artifacts:
            shutil.rmtree(os.path.join(predictor.path, "utils"), ignore_errors=True)
            models_dir = output_subdir("models", config)
            utils.zip_path(predictor.path, os.path.join(models_dir, "models.zip"))

    except Exception:
        log.warning("Error when saving artifacts.", exc_info=True)
Ejemplo n.º 15
0
    def save_info(self, include_model_info=False):
        info = self.get_info(include_model_info=include_model_info)

        save_pkl.save(path=self.path + self.learner_info_name, object=info)
        save_json.save(path=self.path + self.learner_info_json_name, obj=info)
        return info
Ejemplo n.º 16
0
 def save_model_base(self, model_base):
     save_pkl.save(path=self.path + 'utils' + os.path.sep +
                   'model_template.pkl',
                   object=model_base)
Ejemplo n.º 17
0
 def save_val_data(self,
                   data: TimeSeriesDataFrame,
                   verbose: bool = True) -> None:
     path = self.path_data + "val.pkl"
     save_pkl.save(path=path, object=data, verbose=verbose)
Ejemplo n.º 18
0
    def hyperparameter_tune(self, X_train, y_train, X_val, y_val, scheduler_options, **kwargs):
        time_start = time.time()
        logger.log(15, "Beginning hyperparameter tuning for Gradient Boosting Model...")
        self._set_default_searchspace()
        params_copy = self.params.copy()
        if isinstance(params_copy['min_data_in_leaf'], Int):
            upper_minleaf = params_copy['min_data_in_leaf'].upper
            if upper_minleaf > X_train.shape[0]:  # TODO: this min_data_in_leaf adjustment based on sample size may not be necessary
                upper_minleaf = max(1, int(X_train.shape[0] / 5.0))
                lower_minleaf = params_copy['min_data_in_leaf'].lower
                if lower_minleaf > upper_minleaf:
                    lower_minleaf = max(1, int(upper_minleaf / 3.0))
                params_copy['min_data_in_leaf'] = Int(lower=lower_minleaf, upper=upper_minleaf)

        directory = self.path  # also create model directory if it doesn't exist
        # TODO: This will break on S3! Use tabular/utils/savers for datasets, add new function
        os.makedirs(directory, exist_ok=True)
        scheduler_func, scheduler_options = scheduler_options  # Unpack tuple
        if scheduler_func is None or scheduler_options is None:
            raise ValueError("scheduler_func and scheduler_options cannot be None for hyperparameter tuning")
        num_threads = scheduler_options['resource'].get('num_cpus', -1)
        params_copy['num_threads'] = num_threads
        # num_gpus = scheduler_options['resource']['num_gpus'] # TODO: unused

        dataset_train, dataset_val = self.generate_datasets(X_train=X_train, y_train=y_train, params=params_copy, X_val=X_val, y_val=y_val)
        dataset_train_filename = "dataset_train.bin"
        train_file = self.path + dataset_train_filename
        if os.path.exists(train_file):  # clean up old files first
            os.remove(train_file)
        dataset_train.save_binary(train_file)
        dataset_val_filename = "dataset_val.bin"  # names without directory info
        val_file = self.path + dataset_val_filename
        if os.path.exists(val_file):  # clean up old files first
            os.remove(val_file)
        dataset_val.save_binary(val_file)
        dataset_val_pkl_filename = 'dataset_val.pkl'
        val_pkl_path = directory + dataset_val_pkl_filename
        save_pkl.save(path=val_pkl_path, object=(X_val, y_val))

        if not np.any([isinstance(params_copy[hyperparam], Space) for hyperparam in params_copy]):
            logger.warning("Attempting to do hyperparameter optimization without any search space (all hyperparameters are already fixed values)")
        else:
            logger.log(15, "Hyperparameter search space for Gradient Boosting Model: ")
            for hyperparam in params_copy:
                if isinstance(params_copy[hyperparam], Space):
                    logger.log(15, f'{hyperparam}:   {params_copy[hyperparam]}')

        util_args = dict(
            dataset_train_filename=dataset_train_filename,
            dataset_val_filename=dataset_val_filename,
            dataset_val_pkl_filename=dataset_val_pkl_filename,
            directory=directory,
            model=self,
            time_start=time_start,
            time_limit=scheduler_options['time_out']
        )
        lgb_trial.register_args(util_args=util_args, **params_copy)
        scheduler = scheduler_func(lgb_trial, **scheduler_options)
        if ('dist_ip_addrs' in scheduler_options) and (len(scheduler_options['dist_ip_addrs']) > 0):
            # This is multi-machine setting, so need to copy dataset to workers:
            logger.log(15, "Uploading data to remote workers...")
            scheduler.upload_files([train_file, val_file, val_pkl_path])  # TODO: currently does not work.
            directory = self.path  # TODO: need to change to path to working directory used on every remote machine
            lgb_trial.update(directory=directory)
            logger.log(15, "uploaded")

        scheduler.run()
        scheduler.join_jobs()

        return self._get_hpo_results(scheduler=scheduler, scheduler_options=scheduler_options, time_start=time_start)
 def save(self, path: str):
     save_pkl.save(path=path, object=self)
Ejemplo n.º 20
0
 def _callback(env):
     if ((env.iteration - offset) % interval == 0) & (env.iteration != 0):
         save_pkl.save(path=path, object=env.model)
         save_pointer.save(path=latest_model_checkpoint, content_path=path)