Exemple #1
0
 def write_to_disk(path: Path):
     # Get or create checkpoint dir.
     path.parent.mkdir(parents=True, exist_ok=True)
     # Write checkpoint to disk.
     with path.open("wb") as f:
         cloudpickle.dump(checkpoint, f)
         logger.debug(f"Checkpoint successfully written to: " f"{path}")
Exemple #2
0
 def _to_directory(self, path: str) -> None:
     if self._data_dict or self._obj_ref:
         # This is a object ref or dict
         data_dict = self.to_dict()
         if _FS_CHECKPOINT_KEY in data_dict:
             # This used to be a true fs checkpoint, so restore
             _unpack(data_dict[_FS_CHECKPOINT_KEY], path)
         else:
             # This is a dict checkpoint. Dump data into checkpoint.pkl
             checkpoint_data_path = os.path.join(
                 path, _DICT_CHECKPOINT_FILE_NAME)
             with open(checkpoint_data_path, "wb") as f:
                 pickle.dump(data_dict, f)
     else:
         # This is either a local fs, remote node fs, or external fs
         local_path = self._local_path
         external_path = _get_external_path(self._uri)
         if local_path:
             if local_path != path:
                 # If this exists on the local path, just copy over
                 if path and os.path.exists(path):
                     shutil.rmtree(path)
                 shutil.copytree(local_path, path)
         elif external_path:
             # If this exists on external storage (e.g. cloud), download
             download_from_uri(uri=external_path,
                               local_path=path,
                               filelock=False)
         else:
             raise RuntimeError(
                 f"No valid location found for checkpoint {self}: {self._uri}"
             )
Exemple #3
0
    def process_checkpoint(checkpoint, parent_dir, trainable_state):
        saved_as_dict = False
        if isinstance(checkpoint, string_types):
            if not checkpoint.startswith(parent_dir):
                raise ValueError(
                    "The returned checkpoint path must be within the "
                    "given checkpoint dir {}: {}".format(
                        parent_dir, checkpoint))
            checkpoint_path = checkpoint
            if os.path.isdir(checkpoint_path):
                # Add trailing slash to prevent tune metadata from
                # being written outside the directory.
                checkpoint_path = os.path.join(checkpoint_path, "")
        elif isinstance(checkpoint, dict):
            saved_as_dict = True
            checkpoint_path = os.path.join(parent_dir, "checkpoint")
            with open(checkpoint_path, "wb") as f:
                pickle.dump(checkpoint, f)
        else:
            raise ValueError("Returned unexpected type {}. "
                             "Expected str or dict.".format(type(checkpoint)))

        with open(checkpoint_path + ".tune_metadata", "wb") as f:
            trainable_state["saved_as_dict"] = saved_as_dict
            pickle.dump(trainable_state, f)
        return checkpoint_path
Exemple #4
0
 def _postprocess_checkpoint(self, checkpoint_path: str):
     preprocessor = self._merged_config.get("preprocessor", None)
     if not checkpoint_path or preprocessor is None:
         return
     with open(os.path.join(checkpoint_path, PREPROCESSOR_KEY),
               "wb") as f:
         cpickle.dump(preprocessor, f)
Exemple #5
0
    def __init__(
        self,
        restore_path: str = None,
        trainable: Optional[Union[str, Callable, Type[Trainable],
                                  BaseTrainer, ]] = None,
        param_space: Optional[Dict[str, Any]] = None,
        tune_config: Optional[TuneConfig] = None,
        run_config: Optional[RunConfig] = None,
        _tuner_kwargs: Optional[Dict] = None,
    ):
        # Restored from Tuner checkpoint.
        if restore_path:
            trainable_ckpt = os.path.join(restore_path, _TRAINABLE_PKL)
            with open(trainable_ckpt, "rb") as fp:
                trainable = pickle.load(fp)

            tuner_ckpt = os.path.join(restore_path, _TUNER_PKL)
            with open(tuner_ckpt, "rb") as fp:
                tuner = pickle.load(fp)
                self.__dict__.update(tuner.__dict__)

            self._is_restored = True
            self._trainable = trainable
            self._experiment_checkpoint_dir = restore_path
            return

        # Start from fresh
        if not trainable:
            raise TuneError("You need to provide a trainable to tune.")

        # If no run config was passed to Tuner directly, use the one from the Trainer,
        # if available
        if not run_config and isinstance(trainable, BaseTrainer):
            run_config = trainable.run_config

        self._is_restored = False
        self._trainable = trainable
        self._tune_config = tune_config or TuneConfig()
        self._run_config = run_config or RunConfig()
        self._tuner_kwargs = copy.deepcopy(_tuner_kwargs) or {}
        self._experiment_checkpoint_dir = self._setup_create_experiment_checkpoint_dir(
            self._run_config)

        # Not used for restored Tuner.
        self._param_space = param_space or {}

        # This needs to happen before `tune.run()` is kicked in.
        # This is because currently tune does not exit gracefully if
        # run in ray client mode - if crash happens, it just exits immediately
        # without allowing for checkpointing tuner and trainable.
        # Thus this has to happen before tune.run() so that we can have something
        # to restore from.
        tuner_ckpt = os.path.join(self._experiment_checkpoint_dir, _TUNER_PKL)
        with open(tuner_ckpt, "wb") as fp:
            pickle.dump(self, fp)

        trainable_ckpt = os.path.join(self._experiment_checkpoint_dir,
                                      _TRAINABLE_PKL)
        with open(trainable_ckpt, "wb") as fp:
            pickle.dump(self._trainable, fp)
Exemple #6
0
def save_preprocessor_to_dir(
    preprocessor: "Preprocessor",
    parent_dir: Union[os.PathLike, str],
) -> None:
    """Save preprocessor to file. Returns path saved to."""
    parent_dir = Path(parent_dir)
    with open(parent_dir.joinpath(PREPROCESSOR_KEY), "wb") as f:
        cpickle.dump(preprocessor, f)
Exemple #7
0
 def update_config(self, config):
     self.config = config
     config_out = os.path.join(self.logdir, "params.json")
     with open(config_out, "w") as f:
         json.dump(self.config, f, cls=_SafeFallbackEncoder)
     config_pkl = os.path.join(self.logdir, "params.pkl")
     with open(config_pkl, "wb") as f:
         cloudpickle.dump(self.config, f)
Exemple #8
0
 def update_config(self, config: Dict):
     self.config = config
     config_out = os.path.join(self.logdir, EXPR_PARAM_FILE)
     with open(config_out, "w") as f:
         json.dump(self.config, f, indent=2, sort_keys=True, cls=SafeFallbackEncoder)
     config_pkl = os.path.join(self.logdir, EXPR_PARAM_PICKLE_FILE)
     with open(config_pkl, "wb") as f:
         cloudpickle.dump(self.config, f)
Exemple #9
0
 def save(self, checkpoint_path: str):
     if self._random_state_seed is not None:
         numpy_random_state = np.random.get_state()
     else:
         numpy_random_state = None
     save_object = self.__dict__
     save_object["_random_state_seed_to_set"] = numpy_random_state
     with open(checkpoint_path, "wb") as outputFile:
         cloudpickle.dump(save_object, outputFile)
Exemple #10
0
            def save_checkpoint(self, tmp_checkpoint_dir: str = ""):
                checkpoint_path = super().save_checkpoint()
                parent_dir = TrainableUtil.find_checkpoint_dir(checkpoint_path)

                preprocessor = self._merged_config.get("preprocessor", None)
                if parent_dir and preprocessor:
                    with open(os.path.join(parent_dir, PREPROCESSOR_KEY),
                              "wb") as f:
                        cpickle.dump(preprocessor, f)
                return checkpoint_path
Exemple #11
0
 def write_checkpoint(self, checkpoint: Dict):
     self.add_tune_checkpoint_id(checkpoint)
     # If inside a Tune Trainable, then checkpoint with Tune.
     with tune.checkpoint_dir(step=self._latest_checkpoint_id) as checkpoint_dir:
         path = Path(checkpoint_dir)
         # Use a standard file name so that we know which file to load
         # the checkpoint from.
         file_path = path.joinpath(TUNE_CHECKPOINT_FILE_NAME)
         with file_path.open("wb") as f:
             cloudpickle.dump(checkpoint, f)
Exemple #12
0
def _atomic_save(state: Dict, checkpoint_dir: str, file_name: str):
    """Atomically saves the object to the checkpoint directory

    This is automatically used by tune.run during a Tune job.
    """
    tmp_search_ckpt_path = os.path.join(checkpoint_dir,
                                        ".tmp_search_generator_ckpt")
    with open(tmp_search_ckpt_path, "wb") as f:
        cloudpickle.dump(state, f)

    os.rename(tmp_search_ckpt_path, os.path.join(checkpoint_dir, file_name))
Exemple #13
0
def test_predict_no_preprocessor():
    with tempfile.TemporaryDirectory() as tmpdir:
        with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f:
            cpickle.dump(model, f)

        checkpoint = Checkpoint.from_directory(tmpdir)
        predictor = SklearnPredictor.from_checkpoint(checkpoint)

    data_batch = np.array([[1, 2], [3, 4], [5, 6]])
    predictions = predictor.predict(data_batch)

    assert len(predictions) == 3
Exemple #14
0
 def __exit__(self, type, value, traceback):
     if self._shelf:
         # Close the shelf file, and store the number of episodes for ease
         self._shelf["num_episodes"] = self._num_episodes
         self._shelf.close()
     elif self._outfile and not self._use_shelve:
         # Dump everything as one big pickle:
         cloudpickle.dump(self._rollouts, open(self._outfile, "wb"))
     if self._update_file:
         # Remove the temp progress file:
         self._get_tmp_progress_filename().unlink()
         self._update_file = None
Exemple #15
0
 def update_config(self, config):
     self.config = config
     config_out = os.path.join(self.logdir, "params.json")
     with open(config_out, "w") as f:
         json.dump(self.config,
                   f,
                   indent=2,
                   sort_keys=True,
                   cls=tune_logger._SafeFallbackEncoder)
     config_pkl = os.path.join(self.logdir, "params.pkl")
     with open(config_pkl, "wb") as f:
         cloudpickle.dump(self.config, f)
Exemple #16
0
 def write_checkpoint(self, checkpoint: Dict):
     # Store the checkpoint_id in the file so that the Tune trial can be
     # resumed after failure or cancellation.
     checkpoint[TUNE_CHECKPOINT_ID] = self._latest_checkpoint_id
     # If inside a Tune Trainable, then checkpoint with Tune.
     with tune.checkpoint_dir(step=self._latest_checkpoint_id) as \
             checkpoint_dir:
         path = Path(checkpoint_dir)
         # Use a standard file name so that we know which file to load
         # the checkpoint from.
         file_path = path.joinpath(TUNE_CHECKPOINT_FILE_NAME)
         with file_path.open("wb") as f:
             cloudpickle.dump(checkpoint, f)
Exemple #17
0
 def _init(self):
     config_out = os.path.join(self.logdir, "params.json")
     with open(config_out, "w") as f:
         json.dump(self.config,
                   f,
                   indent=2,
                   sort_keys=True,
                   cls=_SafeFallbackEncoder)
     config_pkl = os.path.join(self.logdir, "params.pkl")
     with open(config_pkl, "wb") as f:
         cloudpickle.dump(self.config, f)
     local_file = os.path.join(self.logdir, "result.json")
     self.local_out = open(local_file, "a")
Exemple #18
0
    def write_checkpoint(self, checkpoint: Dict):
        """Writes checkpoint to disk."""
        if self._checkpoint_strategy.num_to_keep == 0:
            # Checkpoints should not be persisted to disk.
            return

        # TODO(matt): Implement additional checkpoint strategy functionality.
        # Get or create checkpoint dir.
        self.latest_checkpoint_dir.mkdir(parents=True, exist_ok=True)
        # Write checkpoint to disk.
        with self.latest_checkpoint_path.open("wb") as f:
            cloudpickle.dump(checkpoint, f)
            logger.debug(f"Checkpoint successfully written to: "
                         f"{self.latest_checkpoint_path}")
Exemple #19
0
 def _init(self):
     config_out = os.path.join(self.logdir, "params.json")
     with open(config_out, "w") as f:
         json.dump(
             self.config,
             f,
             indent=2,
             sort_keys=True,
             cls=_SafeFallbackEncoder)
     config_pkl = os.path.join(self.logdir, "params.pkl")
     with open(config_pkl, "wb") as f:
         cloudpickle.dump(self.config, f)
     local_file = os.path.join(self.logdir, "result.json")
     self.local_out = open(local_file, "a")
Exemple #20
0
def test_batch_prediction_with_set_cpus(ray_start_4_cpus):
    with tempfile.TemporaryDirectory() as tmpdir:
        with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f:
            cpickle.dump(model, f)

        checkpoint = Checkpoint.from_directory(tmpdir)

        batch_predictor = BatchPredictor.from_checkpoint(
            checkpoint, SklearnPredictor)

        test_dataset = ray.data.from_pandas(
            pd.DataFrame(dummy_data, columns=["A", "B"]))
        batch_predictor.predict(test_dataset,
                                num_cpus_per_worker=2,
                                num_estimator_cpus=2)
Exemple #21
0
 def write_error_log(self,
                     exc: Optional[Union[TuneError, RayTaskError]] = None):
     if exc and self.logdir:
         self.num_failures += 1
         self.error_file = os.path.join(self.logdir, "error.txt")
         if exc and isinstance(exc, RayTaskError):
             # Piping through the actual error to result grid.
             self.pickled_error_file = os.path.join(self.logdir,
                                                    "error.pkl")
             with open(self.pickled_error_file, "wb") as f:
                 cloudpickle.dump(exc, f)
         with open(self.error_file, "a+") as f:
             f.write("Failure # {} (occurred at {})\n".format(
                 self.num_failures, date_str()))
             f.write(str(exc) + "\n")
     self.invalidate_json_state()
Exemple #22
0
    def to_directory(self, path: Optional[str] = None) -> str:
        """Write checkpoint data to directory.

        Args:
            path (str): Target directory to restore data in.

        Returns:
            str: Directory containing checkpoint data.
        """
        path = path if path is not None else _temporary_checkpoint_dir()

        os.makedirs(path, exist_ok=True)
        # Drop marker
        open(os.path.join(path, ".is_checkpoint"), "a").close()

        if self._data_dict or self._obj_ref:
            # This is a object ref or dict
            data_dict = self.to_dict()

            if _FS_CHECKPOINT_KEY in data_dict:
                # This used to be a true fs checkpoint, so restore
                _unpack(data_dict[_FS_CHECKPOINT_KEY], path)
            else:
                # This is a dict checkpoint. Dump data into checkpoint.pkl
                checkpoint_data_path = os.path.join(
                    path, _DICT_CHECKPOINT_FILE_NAME)
                with open(checkpoint_data_path, "wb") as f:
                    pickle.dump(data_dict, f)
        else:
            # This is either a local fs, remote node fs, or external fs
            local_path = self._local_path
            external_path = _get_external_path(self._uri)
            if local_path:
                if local_path != path:
                    # If this exists on the local path, just copy over
                    if path and os.path.exists(path):
                        shutil.rmtree(path)
                    shutil.copytree(local_path, path)
            elif external_path:
                # If this exists on external storage (e.g. cloud), download
                download_from_bucket(bucket=external_path, local_path=path)
            else:
                raise RuntimeError(
                    f"No valid location found for checkpoint {self}: {self._uri}"
                )

        return path
Exemple #23
0
            def save_checkpoint(self, checkpoint_dir: str):
                checkpoint_path = super(AIRRLTrainer,
                                        self).save_checkpoint(checkpoint_dir)

                trainer_class_path = os.path.join(checkpoint_dir,
                                                  RL_TRAINER_CLASS_FILE)
                with open(trainer_class_path, "wb") as fp:
                    cpickle.dump(self.__class__, fp)

                config_path = os.path.join(checkpoint_dir, RL_CONFIG_FILE)
                with open(config_path, "wb") as fp:
                    cpickle.dump(self.config, fp)

                if preprocessor:
                    save_preprocessor_to_dir(preprocessor, checkpoint_dir)

                return checkpoint_path
Exemple #24
0
    def _to_directory(self, path: str) -> None:
        if self._data_dict or self._obj_ref:
            # This is a object ref or dict
            data_dict = self.to_dict()
            if _FS_CHECKPOINT_KEY in data_dict:
                for key in data_dict.keys():
                    if key == _FS_CHECKPOINT_KEY:
                        continue
                    metadata_path = os.path.join(
                        path, f"{key}{_METADATA_CHECKPOINT_SUFFIX}")
                    with open(metadata_path, "wb") as f:
                        pickle.dump(data_dict[key], f)
                # This used to be a true fs checkpoint, so restore
                _unpack(data_dict[_FS_CHECKPOINT_KEY], path)
            else:
                # This is a dict checkpoint.
                # First, restore any additional files
                additional_files = data_dict.pop(
                    _DICT_CHECKPOINT_ADDITIONAL_FILE_KEY, {})
                for file, content in additional_files.items():
                    _unpack(stream=content, path=os.path.join(path, file))

                # Then dump data into checkpoint.pkl
                checkpoint_data_path = os.path.join(
                    path, _DICT_CHECKPOINT_FILE_NAME)
                with open(checkpoint_data_path, "wb") as f:
                    pickle.dump(data_dict, f)
        else:
            # This is either a local fs, remote node fs, or external fs
            local_path = self._local_path
            external_path = _get_external_path(self._uri)
            if local_path:
                if local_path != path:
                    # If this exists on the local path, just copy over
                    if path and os.path.exists(path):
                        shutil.rmtree(path)
                    shutil.copytree(local_path, path)
            elif external_path:
                # If this exists on external storage (e.g. cloud), download
                download_from_uri(uri=external_path,
                                  local_path=path,
                                  filelock=False)
            else:
                raise RuntimeError(
                    f"No valid location found for checkpoint {self}: {self._uri}"
                )
    def _save(self, checkpoint_dir):
        """Creates a checkpoint in ``checkpoint_dir``, creating a pickle file.

        Args:
            checkpoint_dir (str): file path to store pickle checkpoint.

        Returns:
            path (str): file path to the pickled checkpoint file.

        """
        path = os.path.join(checkpoint_dir, "checkpoint")
        try:
            with open(path, "wb") as f:
                cpickle.dump(self.estimator_list, f)
        except Exception:
            warnings.warn("Unable to save estimator.", category=RuntimeWarning)
        return path
def test_init():
    preprocessor = DummyPreprocessor()
    preprocessor.attr = 1
    predictor = SklearnPredictor(estimator=model, preprocessor=preprocessor)

    with tempfile.TemporaryDirectory() as tmpdir:
        with open(os.path.join(tmpdir, MODEL_KEY), "wb") as f:
            cpickle.dump(model, f)
        save_preprocessor_to_dir(preprocessor, tmpdir)

        checkpoint = Checkpoint.from_directory(tmpdir)
        checkpoint_predictor = SklearnPredictor.from_checkpoint(checkpoint)

    assert np.allclose(
        checkpoint_predictor.estimator.feature_importances_,
        predictor.estimator.feature_importances_,
    )
    assert checkpoint_predictor.preprocessor.attr == predictor.preprocessor.attr
Exemple #27
0
def atomic_save(state: Dict, checkpoint_dir: str, file_name: str,
                tmp_file_name: str):
    """Atomically saves the state object to the checkpoint directory.

    This is automatically used by tune.run during a Tune job.

    Args:
        state (dict): Object state to be serialized.
        checkpoint_dir (str): Directory location for the checkpoint.
        file_name (str): Final name of file.
        tmp_file_name (str): Temporary name of file.
    """
    import ray.cloudpickle as cloudpickle
    tmp_search_ckpt_path = os.path.join(checkpoint_dir, tmp_file_name)
    with open(tmp_search_ckpt_path, "wb") as f:
        cloudpickle.dump(state, f)

    os.replace(tmp_search_ckpt_path, os.path.join(checkpoint_dir, file_name))
Exemple #28
0
    def process_checkpoint(
        checkpoint: Union[Dict, str], parent_dir: str, trainable_state: Dict
    ) -> str:
        """Creates checkpoint file structure and writes metadata
        under `parent_dir`.

        The file structure could either look like:
        - checkpoint_00000 (returned path)
        -- .is_checkpoint
        -- .tune_metadata
        -- xxx.pkl (or whatever user specifies in their Trainable)
        Or,
        - checkpoint_00000
        -- .is_checkpoint
        -- checkpoint (returned path)
        -- checkpoint.tune_metadata
        """
        saved_as_dict = False
        if isinstance(checkpoint, string_types):
            if not checkpoint.startswith(parent_dir):
                raise ValueError(
                    "The returned checkpoint path must be within the "
                    "given checkpoint dir {}: {}".format(parent_dir, checkpoint)
                )
            checkpoint_path = checkpoint
            if os.path.isdir(checkpoint_path):
                # Add trailing slash to prevent tune metadata from
                # being written outside the directory.
                checkpoint_path = os.path.join(checkpoint_path, "")
        elif isinstance(checkpoint, dict):
            saved_as_dict = True
            checkpoint_path = os.path.join(parent_dir, "checkpoint")
            with open(checkpoint_path, "wb") as f:
                pickle.dump(checkpoint, f)
        else:
            raise ValueError(
                "Returned unexpected type {}. "
                "Expected str or dict.".format(type(checkpoint))
            )

        with open(checkpoint_path + ".tune_metadata", "wb") as f:
            trainable_state["saved_as_dict"] = saved_as_dict
            pickle.dump(trainable_state, f)
        return checkpoint_path
Exemple #29
0
    def from_estimator(
        cls,
        estimator: BaseEstimator,
        *,
        path: os.PathLike,
        preprocessor: Optional["Preprocessor"] = None,
    ) -> "SklearnCheckpoint":
        """Create a :py:class:`~ray.air.checkpoint.Checkpoint` that stores an sklearn
        ``Estimator``.

        Args:
            estimator: The ``Estimator`` to store in the checkpoint.
            path: The directory where the checkpoint will be stored.
            preprocessor: A fitted preprocessor to be applied before inference.

        Returns:
            An :py:class:`SklearnCheckpoint` containing the specified ``Estimator``.

        Examples:
            >>> from ray.train.sklearn import SklearnCheckpoint
            >>> from sklearn.ensemble import RandomForestClassifier
            >>>
            >>> estimator = RandomForestClassifier()
            >>> checkpoint = SklearnCheckpoint.from_estimator(estimator, path=".")

            You can use a :py:class:`SklearnCheckpoint` to create an
            :py:class:`~ray.train.sklearn.SklearnPredictor` and preform inference.

            >>> from ray.train.sklearn import SklearnPredictor
            >>>
            >>> predictor = SklearnPredictor.from_checkpoint(checkpoint)
        """
        with open(os.path.join(path, MODEL_KEY), "wb") as f:
            cpickle.dump(estimator, f)

        if preprocessor:
            save_preprocessor_to_dir(preprocessor, path)

        checkpoint = cls.from_directory(path)

        return checkpoint
Exemple #30
0
    def _save(self, checkpoint_dir):
        """Creates a checkpoint in ``checkpoint_dir``, creating a pickle file.

        Args:
            checkpoint_dir (str): file path to store pickle checkpoint.

        Returns:
            path (str): file path to the pickled checkpoint file.

        """
        path = os.path.join(checkpoint_dir, "checkpoint")
        with open(path, "wb") as f:
            try:
                cpickle.dump(self.estimator, f)
                self.pickled = True
            except PicklingError:
                self.pickled = False
                warnings.warn("{} could not be pickled. "
                              "Restoring estimators may run into issues."
                              .format(self.estimator))
        return path
Exemple #31
0
def to_air_checkpoint(
    path: str,
    estimator: BaseEstimator,
    preprocessor: Optional["Preprocessor"] = None,
) -> Checkpoint:
    """Convert a pretrained model to AIR checkpoint for serve or inference.

    Args:
        path: The directory path where model and preprocessor steps are stored to.
        estimator: A pretrained model.
        preprocessor: A fitted preprocessor. The preprocessing logic will
            be applied to serve/inference.
    Returns:
        A Ray Air checkpoint.
    """
    with open(os.path.join(path, MODEL_KEY), "wb") as f:
        cpickle.dump(estimator, f)

    if preprocessor:
        save_preprocessor_to_dir(preprocessor, path)

    checkpoint = Checkpoint.from_directory(path)

    return checkpoint