def test_save_state(self): file_size = len(self.cache_data) max_file_size = 2*file_size sim_data = array.array('c', self.cache_data + "\x00"*(max_file_size-file_size)) # Do random I/O on a file tmpf = CryptFile(self.file_name, key=b"a"*32, mode='w+b') f = BlockCachedFile(tmpf, file_size, block_size=7) file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=17) # Save state state_file = CryptFile(self.file_name + '.state', key=b"b"*32, mode='w+b') f.save_state(state_file) state_file.close() f.close() # Restore state state_file = CryptFile(self.file_name + '.state', key=b"b"*32, mode='rb') tmpf = CryptFile(self.file_name, key=b"a"*32, mode='r+b') f = BlockCachedFile.restore_state(tmpf, state_file) state_file.close() # More random I/O for k in range(3): file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=15) f.close()
def test_write_past_end(self): # Check that write-past-end has POSIX semantics tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) self._do_write(f, len(self.cache_data) + 5, "a" * 3) data = self._do_read(f, len(self.cache_data) - 1, 1 + 5 + 3) assert_equal(data, self.cache_data[-1] + "\x00" * 5 + "a" * 3) f.close()
def test_write_past_end(self): # Check that write-past-end has POSIX semantics tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) self._do_write(f, len(self.cache_data) + 5, "a"*3) data = self._do_read(f, len(self.cache_data) - 1, 1+5+3) assert_equal(data, self.cache_data[-1] + "\x00"*5 + "a"*3) f.close()
def test_basics(self): tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) data = self._do_read(f, 137, 91) assert_equal(data, self.cache_data[137:137 + 91]) self._do_write(f, 131, "a" * 31) data = self._do_read(f, 130, 91) assert_equal(data[0], self.cache_data[130]) assert_equal(data[1:32], "a" * 31) assert_equal(data[32:], self.cache_data[162:221]) f.close()
def test_on_top_cryptfile(self): tmpf = CryptFile(self.file_name, key=b"a" * 32, mode='w+b') f = BlockCachedFile(tmpf, len(self.cache_data), block_size=37) self._do_write(f, 0, b"b" * 1237) assert_equal(self._do_read(f, 0, 15), b"b" * 15) f.truncate(7) assert_equal(self._do_read(f, 0, 15), b"b" * 7) f.truncate(0) assert_equal(self._do_read(f, 0, 15), b"") f.close()
def test_basics(self): tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) data = self._do_read(f, 137, 91) assert_equal(data, self.cache_data[137:137+91]) self._do_write(f, 131, "a"*31) data = self._do_read(f, 130, 91) assert_equal(data[0], self.cache_data[130]) assert_equal(data[1:32], "a"*31) assert_equal(data[32:], self.cache_data[162:221]) f.close()
def test_on_top_cryptfile(self): tmpf = CryptFile(self.file_name, key=b"a"*32, mode='w+b') f = BlockCachedFile(tmpf, len(self.cache_data), block_size=37) self._do_write(f, 0, b"b"*1237) assert_equal(self._do_read(f, 0, 15), b"b"*15) f.truncate(7) assert_equal(self._do_read(f, 0, 15), b"b"*7) f.truncate(0) assert_equal(self._do_read(f, 0, 15), b"") f.close()
def test_random_rw(self): tmpf = tempfile.TemporaryFile() file_size = len(self.cache_data) max_file_size = 2*file_size random.seed(1234) for k in range(3): file_size = len(self.cache_data) sim_data = array.array('c', self.cache_data + "\x00"*(max_file_size-file_size)) tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, file_size, block_size=7) self._do_random_rw(f, sim_data, file_size, max_file_size, count=5000) f.close()
def test_truncate(self): # Check that truncate() works as expected tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) self._do_write(f, 0, b"b" * 1237) assert_equal(self._do_read(f, 0, 15), b"b" * 15) f.truncate(7) assert_equal(self._do_read(f, 0, 15), b"b" * 7) f.truncate(0) assert_equal(self._do_read(f, 0, 15), b"") self._do_write(f, 0, b"b" * 1237) assert_equal(self._do_read(f, 1200, 15), b"b" * 15) f.truncate(1200 + 7) assert_equal(self._do_read(f, 1200, 15), b"b" * 7) f.truncate(1200 + 0) assert_equal(self._do_read(f, 1200, 15), b"") f.truncate(1200 - 20) assert_equal(self._do_read(f, 1200, 15), b"") f.close()
def test_random_rw(self): tmpf = tempfile.TemporaryFile() file_size = len(self.cache_data) max_file_size = 2 * file_size random.seed(1234) for k in range(3): file_size = len(self.cache_data) sim_data = array.array( 'c', self.cache_data + "\x00" * (max_file_size - file_size)) tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, file_size, block_size=7) self._do_random_rw(f, sim_data, file_size, max_file_size, count=5000) f.close()
def test_truncate(self): # Check that truncate() works as expected tmpf = tempfile.TemporaryFile() f = BlockCachedFile(tmpf, len(self.cache_data), block_size=7) self._do_write(f, 0, b"b"*1237) assert_equal(self._do_read(f, 0, 15), b"b"*15) f.truncate(7) assert_equal(self._do_read(f, 0, 15), b"b"*7) f.truncate(0) assert_equal(self._do_read(f, 0, 15), b"") self._do_write(f, 0, b"b"*1237) assert_equal(self._do_read(f, 1200, 15), b"b"*15) f.truncate(1200 + 7) assert_equal(self._do_read(f, 1200, 15), b"b"*7) f.truncate(1200 + 0) assert_equal(self._do_read(f, 1200, 15), b"") f.truncate(1200 - 20) assert_equal(self._do_read(f, 1200, 15), b"") f.close()
def test_save_state(self): file_size = len(self.cache_data) max_file_size = 2 * file_size sim_data = array.array( 'c', self.cache_data + "\x00" * (max_file_size - file_size)) # Do random I/O on a file tmpf = CryptFile(self.file_name, key=b"a" * 32, mode='w+b') f = BlockCachedFile(tmpf, file_size, block_size=7) file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=17) # Save state state_file = CryptFile(self.file_name + '.state', key=b"b" * 32, mode='w+b') f.save_state(state_file) state_file.close() f.close() # Restore state state_file = CryptFile(self.file_name + '.state', key=b"b" * 32, mode='rb') tmpf = CryptFile(self.file_name, key=b"a" * 32, mode='r+b') f = BlockCachedFile.restore_state(tmpf, state_file) state_file.close() # More random I/O for k in range(3): file_size = self._do_random_rw(f, sim_data, file_size, max_file_size, count=15) f.close()
class CachedFileInode(object): """ Logical file on-disk. There should be only a single CachedFileInode instance is per each logical file. """ def __init__(self, cachedb, upath, io, filecap, persistent=False): self.upath = upath self.closed = False self.refcnt = 0 self.persistent = persistent self.invalidated = False # Use per-file keys for different files, for safer fallback # in the extremely unlikely event of SHA512 hash collisions filename, key = cachedb.get_filename_and_key(upath) filename_state, key_state = cachedb.get_filename_and_key( upath, b'state') filename_data, key_data = cachedb.get_filename_and_key(upath, b'data') self.lock = threading.RLock() self.dirty = False self.f = None self.f_state = None self.f_data = None self.stream_f = None self.stream_offset = 0 self.stream_data = [] open_complete = False try: if filecap is None: # Create new file raise ValueError() # Reuse cached metadata self.f = CryptFile(filename, key=key, mode='r+b') self.info = json_zlib_load(self.f) if persistent: # Reuse cached data self.f_state = CryptFile(filename_state, key=key_state, mode='r+b') self.f_data = CryptFile(filename_data, key=key_data, mode='r+b') self.block_cache = BlockCachedFile.restore_state( self.f_data, self.f_state) open_complete = True except (IOError, OSError, ValueError): open_complete = False if self.f is not None: self.f.close() self.f = None if self.f_state is not None: self.f_state.close() if self.f_data is not None: self.f_data.close() if not open_complete: if self.f is None: self.f = CryptFile(filename, key=key, mode='w+b') try: if filecap is not None: self._load_info(filecap, io, iscap=True) else: self.info = ['file', {u'size': 0}] self.dirty = True except IOError as err: os.unlink(filename) self.f.close() raise # Create a data file self.f_data = CryptFile(filename_data, key=key_data, mode='w+b') # Block cache on top of data file self.block_cache = BlockCachedFile(self.f_data, self.info[1][u'size']) # Block data state file self.f_state = CryptFile(filename_state, key=key_state, mode='w+b') os.utime(self.f.path, None) os.utime(self.f_data.path, None) os.utime(self.f_state.path, None) def _load_info(self, upath, io, iscap=False): try: self.info = io.get_info(upath, iscap=iscap) except (HTTPError, IOError, ValueError) as err: if isinstance(err, HTTPError) and err.code == 404: raise IOError(errno.ENOENT, "no such file") raise IOError(errno.EREMOTEIO, "failed to retrieve information") self._save_info() def _save_info(self): self.f.truncate(0) self.f.seek(0) if u'retrieved' not in self.info[1]: self.info[1][u'retrieved'] = time.time() json_zlib_dump(self.info, self.f) def is_fresh(self, lifetime): if u'retrieved' not in self.info[1]: return True return (self.info[1][u'retrieved'] + lifetime >= time.time()) def incref(self): with self.lock: self.refcnt += 1 def decref(self): with self.lock: self.refcnt -= 1 if self.refcnt <= 0: self.close() def close(self): with self.lock: if not self.closed: if self.stream_f is not None: self.stream_f.close() self.stream_f = None self.stream_data = [] self.f_state.seek(0) self.f_state.truncate(0) self.block_cache.save_state(self.f_state) self.f_state.close() self.block_cache.close() self.f.close() if not self.persistent and self.upath is not None and not self.invalidated: os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.closed = True def _do_rw(self, io, offset, length_or_data, write=False, no_result=False): if write: data = length_or_data length = len(data) else: length = length_or_data self.lock.acquire() try: preempted = False while True: if write: pos = self.block_cache.pre_write(offset, length) else: pos = self.block_cache.pre_read(offset, length) if pos is None: # cache ready if no_result: return None elif write: return self.block_cache.write(offset, data) else: return self.block_cache.read(offset, length) else: # cache not ready -- fill it up c_offset, c_length = pos if self.stream_f is not None and ( self.stream_offset > c_offset or c_offset >= self.stream_offset + 3 * 131072): if not preempted: # Try to yield to a different in-flight cache operation, in case there # is one waiting for the lock preempted = True self.lock.release() time.sleep(0) self.lock.acquire() continue self.stream_f.close() self.stream_f = None self.stream_data = [] if self.stream_f is None: self.stream_f = io.get_content(self.info[1][u'ro_uri'], c_offset, iscap=True) self.stream_offset = c_offset self.stream_data = [] read_offset = self.stream_offset read_bytes = sum(len(x) for x in self.stream_data) while read_offset + read_bytes < c_offset + c_length: block = self.stream_f.read(131072) if not block: self.stream_f.close() self.stream_f = None self.stream_data = [] break self.stream_data.append(block) read_bytes += len(block) self.stream_offset, self.stream_data = self.block_cache.receive_cached_data( self.stream_offset, self.stream_data) except (HTTPError, IOError) as err: if self.stream_f is not None: self.stream_f.close() self.stream_f = None raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err), )) finally: self.lock.release() def get_size(self): return self.block_cache.get_size() def get_attr(self): return dict(type='file', size=self.get_size()) def read(self, io, offset, length): return self._do_rw(io, offset, length, write=False) def write(self, io, offset, data): """ Write data to file. If *offset* is None, it means append. """ with self.lock: if len(data) > 0: self.dirty = True if offset is None: offset = self.get_size() self._do_rw(io, offset, data, write=True) def truncate(self, size): with self.lock: if size != self.block_cache.get_size(): self.dirty = True self.block_cache.truncate(size) def _buffer_whole_file(self, io): self._do_rw(io, 0, self.block_cache.get_size(), write=False, no_result=True) def upload(self, io, parent_cap=None): with self.lock: # Buffer all data self._buffer_whole_file(io) # Upload the whole file class Fwrapper(object): def __init__(self, block_cache): self.block_cache = block_cache self.size = block_cache.get_size() self.f = self.block_cache.get_file() self.f.seek(0) def __len__(self): return self.size def read(self, size): return self.f.read(size) if parent_cap is None: upath = self.upath iscap = False else: upath = parent_cap + u"/" + ubasename(self.upath) iscap = True fw = Fwrapper(self.block_cache) try: filecap = io.put_file(upath, fw, iscap=iscap) except (HTTPError, IOError) as err: raise IOError(errno.EFAULT, "I/O error: %s" % (str(err), )) self.info[1][u'ro_uri'] = filecap self.info[1][u'size'] = self.get_size() self._save_info() self.dirty = False return filecap def unlink(self): with self.lock: if self.upath is not None and not self.invalidated: os.unlink(self.f.path) os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.upath = None
class CachedFileInode(object): """ Logical file on-disk. There should be only a single CachedFileInode instance is per each logical file. """ def __init__(self, cachedb, upath, io, filecap, persistent=False): self.upath = upath self.closed = False self.refcnt = 0 self.persistent = persistent self.invalidated = False # Use per-file keys for different files, for safer fallback # in the extremely unlikely event of SHA512 hash collisions filename, key = cachedb.get_filename_and_key(upath) filename_state, key_state = cachedb.get_filename_and_key(upath, b'state') filename_data, key_data = cachedb.get_filename_and_key(upath, b'data') self.lock = threading.RLock() self.dirty = False self.f = None self.f_state = None self.f_data = None self.stream_f = None self.stream_offset = 0 self.stream_data = [] open_complete = False try: if filecap is None: # Create new file raise ValueError() # Reuse cached metadata self.f = CryptFile(filename, key=key, mode='r+b') self.info = json_zlib_load(self.f) if persistent: # Reuse cached data self.f_state = CryptFile(filename_state, key=key_state, mode='r+b') self.f_data = CryptFile(filename_data, key=key_data, mode='r+b') self.block_cache = BlockCachedFile.restore_state(self.f_data, self.f_state) open_complete = True except (IOError, OSError, ValueError): open_complete = False if self.f is not None: self.f.close() self.f = None if self.f_state is not None: self.f_state.close() if self.f_data is not None: self.f_data.close() if not open_complete: if self.f is None: self.f = CryptFile(filename, key=key, mode='w+b') try: if filecap is not None: self._load_info(filecap, io, iscap=True) else: self.info = ['file', {u'size': 0}] self.dirty = True except IOError as err: os.unlink(filename) self.f.close() raise # Create a data file self.f_data = CryptFile(filename_data, key=key_data, mode='w+b') # Block cache on top of data file self.block_cache = BlockCachedFile(self.f_data, self.info[1][u'size']) # Block data state file self.f_state = CryptFile(filename_state, key=key_state, mode='w+b') os.utime(self.f.path, None) os.utime(self.f_data.path, None) os.utime(self.f_state.path, None) def _load_info(self, upath, io, iscap=False): try: self.info = io.get_info(upath, iscap=iscap) except (HTTPError, IOError, ValueError) as err: if isinstance(err, HTTPError) and err.code == 404: raise IOError(errno.ENOENT, "no such file") raise IOError(errno.EREMOTEIO, "failed to retrieve information") self._save_info() def _save_info(self): self.f.truncate(0) self.f.seek(0) if u'retrieved' not in self.info[1]: self.info[1][u'retrieved'] = time.time() json_zlib_dump(self.info, self.f) def is_fresh(self, lifetime): if u'retrieved' not in self.info[1]: return True return (self.info[1][u'retrieved'] + lifetime >= time.time()) def incref(self): with self.lock: self.refcnt += 1 def decref(self): with self.lock: self.refcnt -= 1 if self.refcnt <= 0: self.close() def close(self): with self.lock: if not self.closed: if self.stream_f is not None: self.stream_f.close() self.stream_f = None self.stream_data = [] self.f_state.seek(0) self.f_state.truncate(0) self.block_cache.save_state(self.f_state) self.f_state.close() self.block_cache.close() self.f.close() if not self.persistent and self.upath is not None and not self.invalidated: os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.closed = True def _do_rw(self, io, offset, length_or_data, write=False, no_result=False): if write: data = length_or_data length = len(data) else: length = length_or_data self.lock.acquire() try: preempted = False while True: if write: pos = self.block_cache.pre_write(offset, length) else: pos = self.block_cache.pre_read(offset, length) if pos is None: # cache ready if no_result: return None elif write: return self.block_cache.write(offset, data) else: return self.block_cache.read(offset, length) else: # cache not ready -- fill it up c_offset, c_length = pos if self.stream_f is not None and (self.stream_offset > c_offset or c_offset >= self.stream_offset + 3*131072): if not preempted: # Try to yield to a different in-flight cache operation, in case there # is one waiting for the lock preempted = True self.lock.release() time.sleep(0) self.lock.acquire() continue self.stream_f.close() self.stream_f = None self.stream_data = [] if self.stream_f is None: self.stream_f = io.get_content(self.info[1][u'ro_uri'], c_offset, iscap=True) self.stream_offset = c_offset self.stream_data = [] read_offset = self.stream_offset read_bytes = sum(len(x) for x in self.stream_data) while read_offset + read_bytes < c_offset + c_length: block = self.stream_f.read(131072) if not block: self.stream_f.close() self.stream_f = None self.stream_data = [] break self.stream_data.append(block) read_bytes += len(block) self.stream_offset, self.stream_data = self.block_cache.receive_cached_data( self.stream_offset, self.stream_data) except (HTTPError, IOError) as err: if self.stream_f is not None: self.stream_f.close() self.stream_f = None raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err),)) finally: self.lock.release() def get_size(self): return self.block_cache.get_size() def get_attr(self): return dict(type='file', size=self.get_size()) def read(self, io, offset, length): return self._do_rw(io, offset, length, write=False) def write(self, io, offset, data): """ Write data to file. If *offset* is None, it means append. """ with self.lock: if len(data) > 0: self.dirty = True if offset is None: offset = self.get_size() self._do_rw(io, offset, data, write=True) def truncate(self, size): with self.lock: if size != self.block_cache.get_size(): self.dirty = True self.block_cache.truncate(size) def _buffer_whole_file(self, io): self._do_rw(io, 0, self.block_cache.get_size(), write=False, no_result=True) def upload(self, io, parent_cap=None): with self.lock: # Buffer all data self._buffer_whole_file(io) # Upload the whole file class Fwrapper(object): def __init__(self, block_cache): self.block_cache = block_cache self.size = block_cache.get_size() self.f = self.block_cache.get_file() self.f.seek(0) def __len__(self): return self.size def read(self, size): return self.f.read(size) if parent_cap is None: upath = self.upath iscap = False else: upath = parent_cap + u"/" + ubasename(self.upath) iscap = True fw = Fwrapper(self.block_cache) try: filecap = io.put_file(upath, fw, iscap=iscap) except (HTTPError, IOError) as err: raise IOError(errno.EFAULT, "I/O error: %s" % (str(err),)) self.info[1][u'ro_uri'] = filecap self.info[1][u'size'] = self.get_size() self._save_info() self.dirty = False return filecap def unlink(self): with self.lock: if self.upath is not None and not self.invalidated: os.unlink(self.f.path) os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.upath = None