def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if not path.startswith(self.target_protocol): store_path = self.target_protocol + "://" + path else: store_path = path path = self.fs._strip_protocol(store_path) if "r" not in mode: return self.fs._open(path, mode=mode, **kwargs) detail, fn = self._check_file(store_path) if detail: hash, blocks = detail["fn"], detail["blocks"] if blocks is True: logger.debug("Opening local copy of %s" % path) return open(fn, mode) else: raise ValueError( "Attempt to open partially cached file %s" "as a wholly cached file" % path ) else: hash = hash_name(path, self.same_names) fn = os.path.join(self.storage[-1], hash) blocks = True detail = { "fn": hash, "blocks": blocks, "time": time.time(), "uid": self.fs.ukey(path), } self.cached_files[-1][store_path] = detail logger.debug("Copying %s to local cache" % path) kwargs["mode"] = mode # call target filesystems open # TODO: why not just use fs.get ?? f = self.fs._open(path, **kwargs) if self.compression: comp = ( infer_compression(path) if self.compression == "infer" else self.compression ) f = compr[comp](f, mode="rb") with open(fn, "wb") as f2: if isinstance(f, AbstractBufferedFile): # want no type of caching if just downloading whole thing f.cache = BaseCache(0, f.cache.fetcher, f.size) if getattr(f, "blocksize", 0) and f.size: # opportunity to parallelise here data = True while data: data = f.read(f.blocksize) f2.write(data) else: # this only applies to HTTP, should instead use streaming f2.write(f.read()) self.save_cache() return self._open(path, mode)
def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if "r" not in mode: return LocalTempFile(self, path, mode=mode) fn = self._check_file(path) if fn: return open(fn, mode) sha = self.hash_name(path, self.same_names) fn = os.path.join(self.storage[-1], sha) logger.debug("Copying %s to local cache" % path) kwargs["mode"] = mode self._mkcache() if self.compression: with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: if isinstance(f, AbstractBufferedFile): # want no type of caching if just downloading whole thing f.cache = BaseCache(0, f.cache.fetcher, f.size) comp = (infer_compression(path) if self.compression == "infer" else self.compression) f = compr[comp](f, mode="rb") data = True while data: block = getattr(f, "blocksize", 5 * 2**20) data = f.read(block) f2.write(data) else: self.fs.get(path, fn) return self._open(path, mode)
def test_cache_getitem_raises(): cacher = BaseCache(4, letters_fetcher, len(string.ascii_letters)) with pytest.raises(TypeError, match="int"): cacher[5] with pytest.raises(ValueError, match="contiguous"): cacher[::4]
def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if not path.startswith(self.target_protocol): store_path = self.target_protocol + "://" + path else: store_path = path path = self.fs._strip_protocol(store_path) if "r" not in mode: return self.fs._open(path, mode=mode, **kwargs) fn = self._check_file(path) if fn: return open(fn, mode) sha = hashlib.sha256(path.encode()).hexdigest() fn = os.path.join(self.storage[-1], sha) logger.debug("Copying %s to local cache" % path) kwargs["mode"] = mode with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: if isinstance(f, AbstractBufferedFile): # want no type of caching if just downloading whole thing f.cache = BaseCache(0, f.cache.fetcher, f.size) if getattr(f, "blocksize", 0) and f.size: # opportunity to parallelise here data = True while data: data = f.read(f.blocksize) f2.write(data) else: # this only applies to HTTP, should instead use streaming f2.write(f.read()) return self._open(path, mode)
def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if "r" not in mode: return self.fs._open(path, mode=mode, **kwargs) detail = self._check_file(path) if detail: detail, fn = detail _, blocks = detail["fn"], detail["blocks"] if blocks is True: logger.debug("Opening local copy of %s" % path) # In order to support downstream filesystems to be able to # infer the compression from the original filename, like # the `TarFileSystem`, let's extend the `io.BufferedReader` # fileobject protocol by adding a dedicated attribute # `original`. f = open(fn, mode) f.original = detail.get("original") return f else: raise ValueError( "Attempt to open partially cached file %s" "as a wholly cached file" % path ) else: fn = self._make_local_details(path) kwargs["mode"] = mode # call target filesystems open self._mkcache() if self.compression: with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: if isinstance(f, AbstractBufferedFile): # want no type of caching if just downloading whole thing f.cache = BaseCache(0, f.cache.fetcher, f.size) comp = ( infer_compression(path) if self.compression == "infer" else self.compression ) f = compr[comp](f, mode="rb") data = True while data: block = getattr(f, "blocksize", 5 * 2 ** 20) data = f.read(block) f2.write(data) else: self.fs.get(path, fn) self.save_cache() return self._open(path, mode)
def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if "r" not in mode: return self.fs._open(path, mode=mode, **kwargs) detail = self._check_file(path) if detail: detail, fn = detail hash, blocks = detail["fn"], detail["blocks"] if blocks is True: logger.debug("Opening local copy of %s" % path) return open(fn, mode) else: raise ValueError("Attempt to open partially cached file %s" "as a wholly cached file" % path) else: hash = self.hash_name(path, self.same_names) fn = os.path.join(self.storage[-1], hash) detail = { "fn": hash, "blocks": True, "time": time.time(), "uid": self.fs.ukey(path), } self.cached_files[-1][path] = detail logger.debug("Copying %s to local cache" % path) kwargs["mode"] = mode # call target filesystems open self._mkcache() if self.compression: with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2: if isinstance(f, AbstractBufferedFile): # want no type of caching if just downloading whole thing f.cache = BaseCache(0, f.cache.fetcher, f.size) comp = (infer_compression(path) if self.compression == "infer" else self.compression) f = compr[comp](f, mode="rb") data = True while data: block = getattr(f, "blocksize", 5 * 2**20) data = f.read(block) f2.write(data) else: self.fs.get(path, fn) self.save_cache() return self._open(path, mode)