예제 #1
0
    def _open(self, path, mode="rb", **kwargs):
        path = self._strip_protocol(path)

        if not path.startswith(self.target_protocol):
            store_path = self.target_protocol + "://" + path
        else:
            store_path = path
        path = self.fs._strip_protocol(store_path)
        if "r" not in mode:
            return self.fs._open(path, mode=mode, **kwargs)
        detail, fn = self._check_file(store_path)
        if detail:
            hash, blocks = detail["fn"], detail["blocks"]
            if blocks is True:
                logger.debug("Opening local copy of %s" % path)
                return open(fn, mode)
            else:
                raise ValueError(
                    "Attempt to open partially cached file %s"
                    "as a wholly cached file" % path
                )
        else:
            hash = hash_name(path, self.same_names)
            fn = os.path.join(self.storage[-1], hash)
            blocks = True
            detail = {
                "fn": hash,
                "blocks": blocks,
                "time": time.time(),
                "uid": self.fs.ukey(path),
            }
            self.cached_files[-1][store_path] = detail
            logger.debug("Copying %s to local cache" % path)
        kwargs["mode"] = mode

        # call target filesystems open
        # TODO: why not just use fs.get ??
        f = self.fs._open(path, **kwargs)
        if self.compression:
            comp = (
                infer_compression(path)
                if self.compression == "infer"
                else self.compression
            )
            f = compr[comp](f, mode="rb")
        with open(fn, "wb") as f2:
            if isinstance(f, AbstractBufferedFile):
                # want no type of caching if just downloading whole thing
                f.cache = BaseCache(0, f.cache.fetcher, f.size)
            if getattr(f, "blocksize", 0) and f.size:
                # opportunity to parallelise here
                data = True
                while data:
                    data = f.read(f.blocksize)
                    f2.write(data)
            else:
                # this only applies to HTTP, should instead use streaming
                f2.write(f.read())
        self.save_cache()
        return self._open(path, mode)
예제 #2
0
    def _open(self, path, mode="rb", **kwargs):
        path = self._strip_protocol(path)

        if "r" not in mode:
            return LocalTempFile(self, path, mode=mode)
        fn = self._check_file(path)
        if fn:
            return open(fn, mode)

        sha = self.hash_name(path, self.same_names)
        fn = os.path.join(self.storage[-1], sha)
        logger.debug("Copying %s to local cache" % path)
        kwargs["mode"] = mode

        self._mkcache()
        if self.compression:
            with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
                if isinstance(f, AbstractBufferedFile):
                    # want no type of caching if just downloading whole thing
                    f.cache = BaseCache(0, f.cache.fetcher, f.size)
                comp = (infer_compression(path)
                        if self.compression == "infer" else self.compression)
                f = compr[comp](f, mode="rb")
                data = True
                while data:
                    block = getattr(f, "blocksize", 5 * 2**20)
                    data = f.read(block)
                    f2.write(data)
        else:
            self.fs.get(path, fn)
        return self._open(path, mode)
예제 #3
0
def test_cache_getitem_raises():
    cacher = BaseCache(4, letters_fetcher, len(string.ascii_letters))
    with pytest.raises(TypeError, match="int"):
        cacher[5]

    with pytest.raises(ValueError, match="contiguous"):
        cacher[::4]
예제 #4
0
    def _open(self, path, mode="rb", **kwargs):
        path = self._strip_protocol(path)

        if not path.startswith(self.target_protocol):
            store_path = self.target_protocol + "://" + path
        else:
            store_path = path
        path = self.fs._strip_protocol(store_path)
        if "r" not in mode:
            return self.fs._open(path, mode=mode, **kwargs)
        fn = self._check_file(path)
        if fn:
            return open(fn, mode)

        sha = hashlib.sha256(path.encode()).hexdigest()
        fn = os.path.join(self.storage[-1], sha)
        logger.debug("Copying %s to local cache" % path)
        kwargs["mode"] = mode

        with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
            if isinstance(f, AbstractBufferedFile):
                # want no type of caching if just downloading whole thing
                f.cache = BaseCache(0, f.cache.fetcher, f.size)
            if getattr(f, "blocksize", 0) and f.size:
                # opportunity to parallelise here
                data = True
                while data:
                    data = f.read(f.blocksize)
                    f2.write(data)
            else:
                # this only applies to HTTP, should instead use streaming
                f2.write(f.read())
        return self._open(path, mode)
예제 #5
0
    def _open(self, path, mode="rb", **kwargs):
        path = self._strip_protocol(path)
        if "r" not in mode:
            return self.fs._open(path, mode=mode, **kwargs)
        detail = self._check_file(path)
        if detail:
            detail, fn = detail
            _, blocks = detail["fn"], detail["blocks"]
            if blocks is True:
                logger.debug("Opening local copy of %s" % path)

                # In order to support downstream filesystems to be able to
                # infer the compression from the original filename, like
                # the `TarFileSystem`, let's extend the `io.BufferedReader`
                # fileobject protocol by adding a dedicated attribute
                # `original`.
                f = open(fn, mode)
                f.original = detail.get("original")
                return f
            else:
                raise ValueError(
                    "Attempt to open partially cached file %s"
                    "as a wholly cached file" % path
                )
        else:
            fn = self._make_local_details(path)
        kwargs["mode"] = mode

        # call target filesystems open
        self._mkcache()
        if self.compression:
            with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
                if isinstance(f, AbstractBufferedFile):
                    # want no type of caching if just downloading whole thing
                    f.cache = BaseCache(0, f.cache.fetcher, f.size)
                comp = (
                    infer_compression(path)
                    if self.compression == "infer"
                    else self.compression
                )
                f = compr[comp](f, mode="rb")
                data = True
                while data:
                    block = getattr(f, "blocksize", 5 * 2 ** 20)
                    data = f.read(block)
                    f2.write(data)
        else:
            self.fs.get(path, fn)
        self.save_cache()
        return self._open(path, mode)
예제 #6
0
    def _open(self, path, mode="rb", **kwargs):
        path = self._strip_protocol(path)
        if "r" not in mode:
            return self.fs._open(path, mode=mode, **kwargs)
        detail = self._check_file(path)
        if detail:
            detail, fn = detail
            hash, blocks = detail["fn"], detail["blocks"]
            if blocks is True:
                logger.debug("Opening local copy of %s" % path)
                return open(fn, mode)
            else:
                raise ValueError("Attempt to open partially cached file %s"
                                 "as a wholly cached file" % path)
        else:
            hash = self.hash_name(path, self.same_names)
            fn = os.path.join(self.storage[-1], hash)
            detail = {
                "fn": hash,
                "blocks": True,
                "time": time.time(),
                "uid": self.fs.ukey(path),
            }
            self.cached_files[-1][path] = detail
            logger.debug("Copying %s to local cache" % path)
        kwargs["mode"] = mode

        # call target filesystems open
        self._mkcache()
        if self.compression:
            with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
                if isinstance(f, AbstractBufferedFile):
                    # want no type of caching if just downloading whole thing
                    f.cache = BaseCache(0, f.cache.fetcher, f.size)
                comp = (infer_compression(path)
                        if self.compression == "infer" else self.compression)
                f = compr[comp](f, mode="rb")
                data = True
                while data:
                    block = getattr(f, "blocksize", 5 * 2**20)
                    data = f.read(block)
                    f2.write(data)
        else:
            self.fs.get(path, fn)
        self.save_cache()
        return self._open(path, mode)