Exemple #1
0
    def test_save_state(self):
        file_size = len(self.cache_data)
        max_file_size = 2*file_size
        sim_data = array.array('c', self.cache_data + "\x00"*(max_file_size-file_size))

        # Do random I/O on a file
        tmpf = CryptFile(self.file_name, key=b"a"*32, mode='w+b')
        f = BlockCachedFile(tmpf, file_size, block_size=7)
        file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=17)

        # Save state
        state_file = CryptFile(self.file_name + '.state', key=b"b"*32, mode='w+b')
        f.save_state(state_file)
        state_file.close()
        f.close()

        # Restore state
        state_file = CryptFile(self.file_name + '.state', key=b"b"*32, mode='rb')
        tmpf = CryptFile(self.file_name, key=b"a"*32, mode='r+b')
        f = BlockCachedFile.restore_state(tmpf, state_file)
        state_file.close()

        # More random I/O
        for k in range(3):
            file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=15)
        f.close()
    def test_save_state(self):
        file_size = len(self.cache_data)
        max_file_size = 2 * file_size
        sim_data = array.array(
            'c', self.cache_data + "\x00" * (max_file_size - file_size))

        # Do random I/O on a file
        tmpf = CryptFile(self.file_name, key=b"a" * 32, mode='w+b')
        f = BlockCachedFile(tmpf, file_size, block_size=7)
        file_size = self._do_random_rw(f,
                                       sim_data,
                                       file_size,
                                       max_file_size,
                                       count=17)

        # Save state
        state_file = CryptFile(self.file_name + '.state',
                               key=b"b" * 32,
                               mode='w+b')
        f.save_state(state_file)
        state_file.close()
        f.close()

        # Restore state
        state_file = CryptFile(self.file_name + '.state',
                               key=b"b" * 32,
                               mode='rb')
        tmpf = CryptFile(self.file_name, key=b"a" * 32, mode='r+b')
        f = BlockCachedFile.restore_state(tmpf, state_file)
        state_file.close()

        # More random I/O
        for k in range(3):
            file_size = self._do_random_rw(f,
                                           sim_data,
                                           file_size,
                                           max_file_size,
                                           count=15)
        f.close()
Exemple #3
0
class CachedFileInode(object):
    """
    Logical file on-disk. There should be only a single CachedFileInode
    instance is per each logical file.
    """
    def __init__(self, cachedb, upath, io, filecap, persistent=False):
        self.upath = upath
        self.closed = False
        self.refcnt = 0
        self.persistent = persistent
        self.invalidated = False

        # Use per-file keys for different files, for safer fallback
        # in the extremely unlikely event of SHA512 hash collisions
        filename, key = cachedb.get_filename_and_key(upath)
        filename_state, key_state = cachedb.get_filename_and_key(
            upath, b'state')
        filename_data, key_data = cachedb.get_filename_and_key(upath, b'data')

        self.lock = threading.RLock()
        self.dirty = False
        self.f = None
        self.f_state = None
        self.f_data = None

        self.stream_f = None
        self.stream_offset = 0
        self.stream_data = []

        open_complete = False

        try:
            if filecap is None:
                # Create new file
                raise ValueError()

            # Reuse cached metadata
            self.f = CryptFile(filename, key=key, mode='r+b')
            self.info = json_zlib_load(self.f)

            if persistent:
                # Reuse cached data
                self.f_state = CryptFile(filename_state,
                                         key=key_state,
                                         mode='r+b')
                self.f_data = CryptFile(filename_data,
                                        key=key_data,
                                        mode='r+b')
                self.block_cache = BlockCachedFile.restore_state(
                    self.f_data, self.f_state)
                open_complete = True
        except (IOError, OSError, ValueError):
            open_complete = False
            if self.f is not None:
                self.f.close()
                self.f = None
            if self.f_state is not None:
                self.f_state.close()
            if self.f_data is not None:
                self.f_data.close()

        if not open_complete:
            if self.f is None:
                self.f = CryptFile(filename, key=key, mode='w+b')
                try:
                    if filecap is not None:
                        self._load_info(filecap, io, iscap=True)
                    else:
                        self.info = ['file', {u'size': 0}]
                        self.dirty = True
                except IOError as err:
                    os.unlink(filename)
                    self.f.close()
                    raise

            # Create a data file
            self.f_data = CryptFile(filename_data, key=key_data, mode='w+b')

            # Block cache on top of data file
            self.block_cache = BlockCachedFile(self.f_data,
                                               self.info[1][u'size'])

            # Block data state file
            self.f_state = CryptFile(filename_state, key=key_state, mode='w+b')

        os.utime(self.f.path, None)
        os.utime(self.f_data.path, None)
        os.utime(self.f_state.path, None)

    def _load_info(self, upath, io, iscap=False):
        try:
            self.info = io.get_info(upath, iscap=iscap)
        except (HTTPError, IOError, ValueError) as err:
            if isinstance(err, HTTPError) and err.code == 404:
                raise IOError(errno.ENOENT, "no such file")
            raise IOError(errno.EREMOTEIO, "failed to retrieve information")
        self._save_info()

    def _save_info(self):
        self.f.truncate(0)
        self.f.seek(0)
        if u'retrieved' not in self.info[1]:
            self.info[1][u'retrieved'] = time.time()
        json_zlib_dump(self.info, self.f)

    def is_fresh(self, lifetime):
        if u'retrieved' not in self.info[1]:
            return True
        return (self.info[1][u'retrieved'] + lifetime >= time.time())

    def incref(self):
        with self.lock:
            self.refcnt += 1

    def decref(self):
        with self.lock:
            self.refcnt -= 1
            if self.refcnt <= 0:
                self.close()

    def close(self):
        with self.lock:
            if not self.closed:
                if self.stream_f is not None:
                    self.stream_f.close()
                    self.stream_f = None
                    self.stream_data = []
                self.f_state.seek(0)
                self.f_state.truncate(0)
                self.block_cache.save_state(self.f_state)
                self.f_state.close()
                self.block_cache.close()
                self.f.close()

                if not self.persistent and self.upath is not None and not self.invalidated:
                    os.unlink(self.f_state.path)
                    os.unlink(self.f_data.path)
            self.closed = True

    def _do_rw(self, io, offset, length_or_data, write=False, no_result=False):
        if write:
            data = length_or_data
            length = len(data)
        else:
            length = length_or_data

        self.lock.acquire()
        try:
            preempted = False
            while True:
                if write:
                    pos = self.block_cache.pre_write(offset, length)
                else:
                    pos = self.block_cache.pre_read(offset, length)

                if pos is None:
                    # cache ready
                    if no_result:
                        return None
                    elif write:
                        return self.block_cache.write(offset, data)
                    else:
                        return self.block_cache.read(offset, length)
                else:
                    # cache not ready -- fill it up
                    c_offset, c_length = pos

                    if self.stream_f is not None and (
                            self.stream_offset > c_offset
                            or c_offset >= self.stream_offset + 3 * 131072):
                        if not preempted:
                            # Try to yield to a different in-flight cache operation, in case there
                            # is one waiting for the lock
                            preempted = True
                            self.lock.release()
                            time.sleep(0)
                            self.lock.acquire()
                            continue

                        self.stream_f.close()
                        self.stream_f = None
                        self.stream_data = []

                    if self.stream_f is None:
                        self.stream_f = io.get_content(self.info[1][u'ro_uri'],
                                                       c_offset,
                                                       iscap=True)
                        self.stream_offset = c_offset
                        self.stream_data = []

                    read_offset = self.stream_offset
                    read_bytes = sum(len(x) for x in self.stream_data)
                    while read_offset + read_bytes < c_offset + c_length:
                        block = self.stream_f.read(131072)
                        if not block:
                            self.stream_f.close()
                            self.stream_f = None
                            self.stream_data = []
                            break

                        self.stream_data.append(block)
                        read_bytes += len(block)
                        self.stream_offset, self.stream_data = self.block_cache.receive_cached_data(
                            self.stream_offset, self.stream_data)

        except (HTTPError, IOError) as err:
            if self.stream_f is not None:
                self.stream_f.close()
            self.stream_f = None
            raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err), ))
        finally:
            self.lock.release()

    def get_size(self):
        return self.block_cache.get_size()

    def get_attr(self):
        return dict(type='file', size=self.get_size())

    def read(self, io, offset, length):
        return self._do_rw(io, offset, length, write=False)

    def write(self, io, offset, data):
        """
        Write data to file. If *offset* is None, it means append.
        """
        with self.lock:
            if len(data) > 0:
                self.dirty = True
                if offset is None:
                    offset = self.get_size()
                self._do_rw(io, offset, data, write=True)

    def truncate(self, size):
        with self.lock:
            if size != self.block_cache.get_size():
                self.dirty = True
            self.block_cache.truncate(size)

    def _buffer_whole_file(self, io):
        self._do_rw(io,
                    0,
                    self.block_cache.get_size(),
                    write=False,
                    no_result=True)

    def upload(self, io, parent_cap=None):
        with self.lock:
            # Buffer all data
            self._buffer_whole_file(io)

            # Upload the whole file
            class Fwrapper(object):
                def __init__(self, block_cache):
                    self.block_cache = block_cache
                    self.size = block_cache.get_size()
                    self.f = self.block_cache.get_file()
                    self.f.seek(0)

                def __len__(self):
                    return self.size

                def read(self, size):
                    return self.f.read(size)

            if parent_cap is None:
                upath = self.upath
                iscap = False
            else:
                upath = parent_cap + u"/" + ubasename(self.upath)
                iscap = True

            fw = Fwrapper(self.block_cache)
            try:
                filecap = io.put_file(upath, fw, iscap=iscap)
            except (HTTPError, IOError) as err:
                raise IOError(errno.EFAULT, "I/O error: %s" % (str(err), ))

            self.info[1][u'ro_uri'] = filecap
            self.info[1][u'size'] = self.get_size()
            self._save_info()

            self.dirty = False

            return filecap

    def unlink(self):
        with self.lock:
            if self.upath is not None and not self.invalidated:
                os.unlink(self.f.path)
                os.unlink(self.f_state.path)
                os.unlink(self.f_data.path)
            self.upath = None
Exemple #4
0
class CachedFileInode(object):
    """
    Logical file on-disk. There should be only a single CachedFileInode
    instance is per each logical file.
    """

    def __init__(self, cachedb, upath, io, filecap, persistent=False):
        self.upath = upath
        self.closed = False
        self.refcnt = 0
        self.persistent = persistent
        self.invalidated = False

        # Use per-file keys for different files, for safer fallback
        # in the extremely unlikely event of SHA512 hash collisions
        filename, key = cachedb.get_filename_and_key(upath)
        filename_state, key_state = cachedb.get_filename_and_key(upath, b'state')
        filename_data, key_data = cachedb.get_filename_and_key(upath, b'data')

        self.lock = threading.RLock()
        self.dirty = False
        self.f = None
        self.f_state = None
        self.f_data = None

        self.stream_f = None
        self.stream_offset = 0
        self.stream_data = []

        open_complete = False

        try:
            if filecap is None:
                # Create new file
                raise ValueError()

            # Reuse cached metadata
            self.f = CryptFile(filename, key=key, mode='r+b')
            self.info = json_zlib_load(self.f)

            if persistent:
                # Reuse cached data
                self.f_state = CryptFile(filename_state, key=key_state, mode='r+b')
                self.f_data = CryptFile(filename_data, key=key_data, mode='r+b')
                self.block_cache = BlockCachedFile.restore_state(self.f_data, self.f_state)
                open_complete = True
        except (IOError, OSError, ValueError):
            open_complete = False
            if self.f is not None:
                self.f.close()
                self.f = None
            if self.f_state is not None:
                self.f_state.close()
            if self.f_data is not None:
                self.f_data.close()

        if not open_complete:
            if self.f is None:
                self.f = CryptFile(filename, key=key, mode='w+b')
                try:
                    if filecap is not None:
                        self._load_info(filecap, io, iscap=True)
                    else:
                        self.info = ['file', {u'size': 0}]
                        self.dirty = True
                except IOError as err:
                    os.unlink(filename)
                    self.f.close()
                    raise

            # Create a data file
            self.f_data = CryptFile(filename_data, key=key_data, mode='w+b')

            # Block cache on top of data file
            self.block_cache = BlockCachedFile(self.f_data, self.info[1][u'size'])

            # Block data state file
            self.f_state = CryptFile(filename_state, key=key_state, mode='w+b')

        os.utime(self.f.path, None)
        os.utime(self.f_data.path, None)
        os.utime(self.f_state.path, None)

    def _load_info(self, upath, io, iscap=False):
        try:
            self.info = io.get_info(upath, iscap=iscap)
        except (HTTPError, IOError, ValueError) as err:
            if isinstance(err, HTTPError) and err.code == 404:
                raise IOError(errno.ENOENT, "no such file")
            raise IOError(errno.EREMOTEIO, "failed to retrieve information")
        self._save_info()

    def _save_info(self):
        self.f.truncate(0)
        self.f.seek(0)
        if u'retrieved' not in self.info[1]:
            self.info[1][u'retrieved'] = time.time()
        json_zlib_dump(self.info, self.f)

    def is_fresh(self, lifetime):
        if u'retrieved' not in self.info[1]:
            return True
        return (self.info[1][u'retrieved'] + lifetime >= time.time())

    def incref(self):
        with self.lock:
            self.refcnt += 1

    def decref(self):
        with self.lock:
            self.refcnt -= 1
            if self.refcnt <= 0:
                self.close()

    def close(self):
        with self.lock:
            if not self.closed:
                if self.stream_f is not None:
                    self.stream_f.close()
                    self.stream_f = None
                    self.stream_data = []
                self.f_state.seek(0)
                self.f_state.truncate(0)
                self.block_cache.save_state(self.f_state)
                self.f_state.close()
                self.block_cache.close()
                self.f.close()

                if not self.persistent and self.upath is not None and not self.invalidated:
                    os.unlink(self.f_state.path)
                    os.unlink(self.f_data.path)
            self.closed = True

    def _do_rw(self, io, offset, length_or_data, write=False, no_result=False):
        if write:
            data = length_or_data
            length = len(data)
        else:
            length = length_or_data

        self.lock.acquire()
        try:
            preempted = False
            while True:
                if write:
                    pos = self.block_cache.pre_write(offset, length)
                else:
                    pos = self.block_cache.pre_read(offset, length)

                if pos is None:
                    # cache ready
                    if no_result:
                        return None
                    elif write:
                        return self.block_cache.write(offset, data)
                    else:
                        return self.block_cache.read(offset, length)
                else:
                    # cache not ready -- fill it up
                    c_offset, c_length = pos

                    if self.stream_f is not None and (self.stream_offset > c_offset or
                                                      c_offset >= self.stream_offset + 3*131072):
                        if not preempted:
                            # Try to yield to a different in-flight cache operation, in case there
                            # is one waiting for the lock
                            preempted = True
                            self.lock.release()
                            time.sleep(0)
                            self.lock.acquire()
                            continue

                        self.stream_f.close()
                        self.stream_f = None
                        self.stream_data = []

                    if self.stream_f is None:
                        self.stream_f = io.get_content(self.info[1][u'ro_uri'], c_offset, iscap=True)
                        self.stream_offset = c_offset
                        self.stream_data = []

                    read_offset = self.stream_offset
                    read_bytes = sum(len(x) for x in self.stream_data)
                    while read_offset + read_bytes < c_offset + c_length:
                        block = self.stream_f.read(131072)
                        if not block:
                            self.stream_f.close()
                            self.stream_f = None
                            self.stream_data = []
                            break

                        self.stream_data.append(block)
                        read_bytes += len(block)
                        self.stream_offset, self.stream_data = self.block_cache.receive_cached_data(
                            self.stream_offset, self.stream_data)

        except (HTTPError, IOError) as err:
            if self.stream_f is not None:
                self.stream_f.close()
            self.stream_f = None
            raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err),))
        finally:
            self.lock.release()

    def get_size(self):
        return self.block_cache.get_size()

    def get_attr(self):
        return dict(type='file', size=self.get_size())

    def read(self, io, offset, length):
        return self._do_rw(io, offset, length, write=False)

    def write(self, io, offset, data):
        """
        Write data to file. If *offset* is None, it means append.
        """
        with self.lock:
            if len(data) > 0:
                self.dirty = True
                if offset is None:
                    offset = self.get_size()
                self._do_rw(io, offset, data, write=True)

    def truncate(self, size):
        with self.lock:
            if size != self.block_cache.get_size():
                self.dirty = True
            self.block_cache.truncate(size)

    def _buffer_whole_file(self, io):
        self._do_rw(io, 0, self.block_cache.get_size(), write=False, no_result=True)

    def upload(self, io, parent_cap=None):
        with self.lock:
            # Buffer all data
            self._buffer_whole_file(io)

            # Upload the whole file
            class Fwrapper(object):
                def __init__(self, block_cache):
                    self.block_cache = block_cache
                    self.size = block_cache.get_size()
                    self.f = self.block_cache.get_file()
                    self.f.seek(0)
                def __len__(self):
                    return self.size
                def read(self, size):
                    return self.f.read(size)

            if parent_cap is None:
                upath = self.upath
                iscap = False
            else:
                upath = parent_cap + u"/" + ubasename(self.upath)
                iscap = True

            fw = Fwrapper(self.block_cache)
            try:
                filecap = io.put_file(upath, fw, iscap=iscap)
            except (HTTPError, IOError) as err:
                raise IOError(errno.EFAULT, "I/O error: %s" % (str(err),))

            self.info[1][u'ro_uri'] = filecap
            self.info[1][u'size'] = self.get_size()
            self._save_info()

            self.dirty = False

            return filecap

    def unlink(self):
        with self.lock:
            if self.upath is not None and not self.invalidated:
                os.unlink(self.f.path)
                os.unlink(self.f_state.path)
                os.unlink(self.f_data.path)
            self.upath = None