Example #1
0
    def to_directory(self, path: Optional[str] = None) -> str:
        """Write checkpoint data to directory.

        Args:
            path: Target directory to restore data in. If not specified,
                will create a temporary directory.

        Returns:
            str: Directory containing checkpoint data.
        """
        user_provided_path = path is not None
        path = path if user_provided_path else self._get_temporary_checkpoint_dir(
        )
        path = os.path.normpath(path)

        _make_dir(path, acquire_del_lock=not user_provided_path)

        try:
            # Timeout 0 means there will be only one attempt to acquire
            # the file lock. If it cannot be aquired, a TimeoutError
            # will be thrown.
            with TempFileLock(f"{path}.lock", timeout=0):
                self._to_directory(path)
        except TimeoutError:
            # if the directory is already locked, then wait but do not do anything.
            with TempFileLock(f"{path}.lock", timeout=-1):
                pass
            if not os.path.exists(path):
                raise RuntimeError(
                    f"Checkpoint directory {path} does not exist, "
                    "even though it should have been created by "
                    "another process. Please raise an issue on GitHub: "
                    "https://github.com/ray-project/ray/issues")

        return path
Example #2
0
def _copy_dir(source_dir: str, target_dir: str, *, _retry: bool = True) -> None:
    """Copy dir with shutil on the actor."""
    target_dir = os.path.normpath(target_dir)
    try:
        # Timeout 0 means there will be only one attempt to acquire
        # the file lock. If it cannot be aquired, a TimeoutError
        # will be thrown.
        with TempFileLock(f"{target_dir}.lock", timeout=0):
            _delete_path_unsafe(target_dir)
            shutil.copytree(source_dir, target_dir)
    except TimeoutError:
        # wait, but do not do anything
        with TempFileLock(f"{target_dir}.lock"):
            pass
        # if the dir was locked due to being deleted,
        # recreate
        if not os.path.exists(target_dir):
            if _retry:
                _copy_dir(source_dir, target_dir, _retry=False)
            else:
                raise RuntimeError(
                    f"Target directory {target_dir} does not exist "
                    "and couldn't be recreated. "
                    "Please raise an issue on GitHub: "
                    "https://github.com/ray-project/ray/issues"
                )
Example #3
0
def _unpack_dir(stream: io.BytesIO, target_dir: str, *, _retry: bool = True) -> None:
    """Unpack tarfile stream into target directory."""
    stream.seek(0)
    target_dir = os.path.normpath(target_dir)
    try:
        # Timeout 0 means there will be only one attempt to acquire
        # the file lock. If it cannot be aquired, a TimeoutError
        # will be thrown.
        with TempFileLock(f"{target_dir}.lock", timeout=0):
            with tarfile.open(fileobj=stream) as tar:
                tar.extractall(target_dir)
    except TimeoutError:
        # wait, but do not do anything
        with TempFileLock(f"{target_dir}.lock"):
            pass
        # if the dir was locked due to being deleted,
        # recreate
        if not os.path.exists(target_dir):
            if _retry:
                _unpack_dir(stream, target_dir, _retry=False)
            else:
                raise RuntimeError(
                    f"Target directory {target_dir} does not exist "
                    "and couldn't be recreated. "
                    "Please raise an issue on GitHub: "
                    "https://github.com/ray-project/ray/issues"
                )
Example #4
0
def download_from_uri(uri: str, local_path: str, filelock: bool = True):
    _assert_pyarrow_installed()

    fs, bucket_path = get_fs_and_path(uri)
    if not fs:
        raise ValueError(
            f"Could not download from URI: "
            f"URI `{uri}` is not a valid or supported cloud target. "
            f"Hint: {fs_hint(uri)}")

    if filelock:
        with TempFileLock(f"{os.path.normpath(local_path)}.lock"):
            pyarrow.fs.copy_files(bucket_path,
                                  local_path,
                                  source_filesystem=fs)
    else:
        pyarrow.fs.copy_files(bucket_path, local_path, source_filesystem=fs)
Example #5
0
    def as_directory(self) -> Iterator[str]:
        """Return checkpoint directory path in a context.

        This function makes checkpoint data available as a directory while avoiding
        unnecessary copies and left-over temporary data.

        If the checkpoint is already a directory checkpoint, it will return
        the existing path. If it is not, it will create a temporary directory,
        which will be deleted after the context is exited.

        If the checkpoint has been created from an object reference, the directory name
        will be constant and equal to the object reference ID. This allows for multiple
        processes to use the same files for improved performance. The directory
        will be deleted after exiting the context only if no other processes are using
        it.
        In any other case, a new temporary directory will be created with each call
        to ``as_directory``.

        Users should treat the returned checkpoint directory as read-only and avoid
        changing any data within it, as it might get deleted when exiting the context.

        Example:

            with checkpoint.as_directory() as checkpoint_dir:
                # Do some read-only processing of files within checkpoint_dir
                pass

            # At this point, if a temporary directory was created, it will have
            # been deleted.

        """
        if self._local_path:
            yield self._local_path
        else:
            temp_dir = self.to_directory()
            del_lock_path = _get_del_lock_path(temp_dir)
            yield temp_dir

            # Cleanup
            try:
                os.remove(del_lock_path)
            except Exception:
                logger.warning(
                    f"Could not remove {del_lock_path} deletion file lock. "
                    f"Traceback:\n{traceback.format_exc()}")

            # In the edge case (process crash before del lock file is removed),
            # we do not remove the directory at all.
            # Since it's in /tmp, this is not that big of a deal.
            # check if any lock files are remaining
            temp_dir_base_name = Path(temp_dir).name
            if not list(
                    Path(temp_dir).parent.glob(
                        _get_del_lock_path(temp_dir_base_name, "*"))):
                try:
                    # Timeout 0 means there will be only one attempt to acquire
                    # the file lock. If it cannot be aquired, a TimeoutError
                    # will be thrown.
                    with TempFileLock(f"{temp_dir}.lock", timeout=0):
                        shutil.rmtree(temp_dir, ignore_errors=True)
                except TimeoutError:
                    pass
Example #6
0
def _delete_path(target_path: str) -> bool:
    """Delete path (files and directories)"""
    target_path = os.path.normpath(target_path)
    with TempFileLock(f"{target_path}.lock"):
        return _delete_path_unsafe(target_path)