Example #1
0
    def _open(self, path, mode="rb", **kwargs):
        """Wrap the target _open

        If the whole file exists in the cache, just open it locally and
        return that.

        Otherwise, open the file on the target FS, and make it have a mmap
        cache pointing to the location which we determine, in our cache.
        The ``blocks`` instance is shared, so as the mmap cache instance
        updates, so does the entry in our ``cached_files`` attribute.
        We monkey-patch this file, so that when it closes, we call
        ``close_and_update`` to save the state of the blocks.
        """
        path = self._strip_protocol(path)
        if not path.startswith(self.protocol):
            path = self.protocol + "://" + path
        if mode != "rb":
            return self.fs._open(path, mode=mode, **kwargs)
        detail, fn = self._check_file(path)
        if detail:
            # file is in cache
            hash, blocks = detail["fn"], detail["blocks"]
            if blocks is True:
                # stored file is complete
                logger.debug("Opening local copy of %s" % path)
                return open(fn, "rb")
            # TODO: action where partial file exists in read-only cache
            logger.debug("Opening partially cached copy of %s" % path)
        else:
            hash = hashlib.sha256(path.encode()).hexdigest()
            fn = os.path.join(self.storage[-1], hash)
            blocks = set()
            detail = {
                "fn": hash,
                "blocks": blocks,
                "time": time.time(),
                "uid": self.fs.ukey(path),
            }
            self.cached_files[-1][path] = detail
            logger.debug("Creating local sparse file for %s" % path)
        kwargs["cache_type"] = "none"
        kwargs["mode"] = mode

        # call target filesystems open
        f = self.fs._open(path, **kwargs)
        if "blocksize" in detail:
            if detail["blocksize"] != f.blocksize:
                raise ValueError(
                    "Cached file must be reopened with same block"
                    "size as original (old: %i, new %i)"
                    "" % (detail["blocksize"], f.blocksize)
                )
        else:
            detail["blocksize"] = f.blocksize
        f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
        close = f.close
        f.close = lambda: self.close_and_update(f, close)
        return f
Example #2
0
    def _open(self, path, mode='rb', **kwargs):
        """Wrap the target _open

        If the whole file exists in the cache, just open it locally and
        return that.

        Otherwise, open the file on the target FS, and make it have a mmap
        cache pointing to the location which we determine, in our cache.
        The ``blocks`` instance is shared, so as the mmap cache instance
        updates, so does the entry in our ``cached_files`` attribute.
        We monkey-patch this file, so that when it closes, we call
        ``close_and_update`` to save the state of the blocks.
        """
        if mode != 'rb':
            return self.fs._open(path, mode=mode, **kwargs)
        if path in self.cached_files:
            detail = self.cached_files[path]
            hash, blocks = detail['fn'], detail['blocks']
            fn = os.path.join(self.storage, hash)
            if blocks is True:
                return open(fn, 'rb')
        else:
            hash = hashlib.sha256(path.encode()).hexdigest()
            fn = os.path.join(self.storage, hash)
            blocks = set()
            detail = {'fn': hash, 'blocks': blocks}
            self.cached_files[path] = detail
        kwargs['cache_type'] = 'none'
        kwargs['mode'] = mode

        # call target filesystems open
        f = self.fs._open(path, **kwargs)
        print(detail)
        if 'blocksize' in detail:
            if detail['blocksize'] != f.blocksize:
                raise ValueError('Cached file must be reopened with same block'
                                 'size as original (old: %i, new %i)'
                                 '' % (detail['blocksize'], f.blocksize))
        else:
            detail['blocksize'] = f.blocksize
        f.cache = MMapCache(f.blocksize, f._fetch_range, f.size,
                            fn, blocks)
        close = f.close
        f.close = lambda: self.close_and_update(f, close)
        return f
Example #3
0
    def _open(self,
              path,
              mode="rb",
              block_size=None,
              autocommit=True,
              cache_options=None,
              **kwargs):
        """Wrap the target _open

        If the whole file exists in the cache, just open it locally and
        return that.

        Otherwise, open the file on the target FS, and make it have a mmap
        cache pointing to the location which we determine, in our cache.
        The ``blocks`` instance is shared, so as the mmap cache instance
        updates, so does the entry in our ``cached_files`` attribute.
        We monkey-patch this file, so that when it closes, we call
        ``close_and_update`` to save the state of the blocks.
        """
        path = self._strip_protocol(path)

        path = self.fs._strip_protocol(path)
        if "r" not in mode:
            return self.fs._open(path,
                                 mode=mode,
                                 block_size=block_size,
                                 autocommit=autocommit,
                                 cache_options=cache_options,
                                 **kwargs)
        detail = self._check_file(path)
        if detail:
            # file is in cache
            detail, fn = detail
            hash, blocks = detail["fn"], detail["blocks"]
            if blocks is True:
                # stored file is complete
                logger.debug("Opening local copy of %s" % path)
                return open(fn, mode)
            # TODO: action where partial file exists in read-only cache
            logger.debug("Opening partially cached copy of %s" % path)
        else:
            hash = self.hash_name(path, self.same_names)
            fn = os.path.join(self.storage[-1], hash)
            blocks = set()
            detail = {
                "fn": hash,
                "blocks": blocks,
                "time": time.time(),
                "uid": self.fs.ukey(path),
            }
            self.cached_files[-1][path] = detail
            logger.debug("Creating local sparse file for %s" % path)

        # call target filesystems open
        self._mkcache()
        f = self.fs._open(path,
                          mode=mode,
                          block_size=block_size,
                          autocommit=autocommit,
                          cache_options=cache_options,
                          cache_type=None,
                          **kwargs)
        if self.compression:
            comp = (infer_compression(path)
                    if self.compression == "infer" else self.compression)
            f = compr[comp](f, mode="rb")
        if "blocksize" in detail:
            if detail["blocksize"] != f.blocksize:
                raise ValueError("Cached file must be reopened with same block"
                                 "size as original (old: %i, new %i)"
                                 "" % (detail["blocksize"], f.blocksize))
        else:
            detail["blocksize"] = f.blocksize
        f.cache = MMapCache(f.blocksize, f._fetch_range, f.size, fn, blocks)
        close = f.close
        f.close = lambda: self.close_and_update(f, close)
        self.save_cache()
        return f