def _open(self, path, mode="rb", **kwargs): """Wrap the target _open If the whole file exists in the cache, just open it locally and return that. Otherwise, open the file on the target FS, and make it have a mmap cache pointing to the location which we determine, in our cache. The ``blocks`` instance is shared, so as the mmap cache instance updates, so does the entry in our ``cached_files`` attribute. We monkey-patch this file, so that when it closes, we call ``close_and_update`` to save the state of the blocks. """ path = self._strip_protocol(path) if not path.startswith(self.protocol): path = self.protocol + "://" + path if mode != "rb": return self.fs._open(path, mode=mode, **kwargs) detail, fn = self._check_file(path) if detail: # file is in cache hash, blocks = detail["fn"], detail["blocks"] if blocks is True: # stored file is complete logger.debug("Opening local copy of %s" % path) return open(fn, "rb") # TODO: action where partial file exists in read-only cache logger.debug("Opening partially cached copy of %s" % path) else: hash = hashlib.sha256(path.encode()).hexdigest() fn = os.path.join(self.storage[-1], hash) blocks = set() detail = { "fn": hash, "blocks": blocks, "time": time.time(), "uid": self.fs.ukey(path), } self.cached_files[-1][path] = detail logger.debug("Creating local sparse file for %s" % path) kwargs["cache_type"] = "none" kwargs["mode"] = mode # call target filesystems open f = self.fs._open(path, **kwargs) if "blocksize" in detail: if detail["blocksize"] != f.blocksize: raise ValueError( "Cached file must be reopened with same block" "size as original (old: %i, new %i)" "" % (detail["blocksize"], f.blocksize) ) else: detail["blocksize"] = f.blocksize f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks) close = f.close f.close = lambda: self.close_and_update(f, close) return f
def _open(self, path, mode='rb', **kwargs): """Wrap the target _open If the whole file exists in the cache, just open it locally and return that. Otherwise, open the file on the target FS, and make it have a mmap cache pointing to the location which we determine, in our cache. The ``blocks`` instance is shared, so as the mmap cache instance updates, so does the entry in our ``cached_files`` attribute. We monkey-patch this file, so that when it closes, we call ``close_and_update`` to save the state of the blocks. """ if mode != 'rb': return self.fs._open(path, mode=mode, **kwargs) if path in self.cached_files: detail = self.cached_files[path] hash, blocks = detail['fn'], detail['blocks'] fn = os.path.join(self.storage, hash) if blocks is True: return open(fn, 'rb') else: hash = hashlib.sha256(path.encode()).hexdigest() fn = os.path.join(self.storage, hash) blocks = set() detail = {'fn': hash, 'blocks': blocks} self.cached_files[path] = detail kwargs['cache_type'] = 'none' kwargs['mode'] = mode # call target filesystems open f = self.fs._open(path, **kwargs) print(detail) if 'blocksize' in detail: if detail['blocksize'] != f.blocksize: raise ValueError('Cached file must be reopened with same block' 'size as original (old: %i, new %i)' '' % (detail['blocksize'], f.blocksize)) else: detail['blocksize'] = f.blocksize f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks) close = f.close f.close = lambda: self.close_and_update(f, close) return f
def _open(self, path, mode="rb", block_size=None, autocommit=True, cache_options=None, **kwargs): """Wrap the target _open If the whole file exists in the cache, just open it locally and return that. Otherwise, open the file on the target FS, and make it have a mmap cache pointing to the location which we determine, in our cache. The ``blocks`` instance is shared, so as the mmap cache instance updates, so does the entry in our ``cached_files`` attribute. We monkey-patch this file, so that when it closes, we call ``close_and_update`` to save the state of the blocks. """ path = self._strip_protocol(path) path = self.fs._strip_protocol(path) if "r" not in mode: return self.fs._open(path, mode=mode, block_size=block_size, autocommit=autocommit, cache_options=cache_options, **kwargs) detail = self._check_file(path) if detail: # file is in cache detail, fn = detail hash, blocks = detail["fn"], detail["blocks"] if blocks is True: # stored file is complete logger.debug("Opening local copy of %s" % path) return open(fn, mode) # TODO: action where partial file exists in read-only cache logger.debug("Opening partially cached copy of %s" % path) else: hash = self.hash_name(path, self.same_names) fn = os.path.join(self.storage[-1], hash) blocks = set() detail = { "fn": hash, "blocks": blocks, "time": time.time(), "uid": self.fs.ukey(path), } self.cached_files[-1][path] = detail logger.debug("Creating local sparse file for %s" % path) # call target filesystems open self._mkcache() f = self.fs._open(path, mode=mode, block_size=block_size, autocommit=autocommit, cache_options=cache_options, cache_type=None, **kwargs) if self.compression: comp = (infer_compression(path) if self.compression == "infer" else self.compression) f = compr[comp](f, mode="rb") if "blocksize" in detail: if detail["blocksize"] != f.blocksize: raise ValueError("Cached file must be reopened with same block" "size as original (old: %i, new %i)" "" % (detail["blocksize"], f.blocksize)) else: detail["blocksize"] = f.blocksize f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks) close = f.close f.close = lambda: self.close_and_update(f, close) self.save_cache() return f