def test_truncate(self): # Check that truncate() works as expected key = b"a" * 32 f = CryptFile(self.file_name, key=key, mode='w+b', block_size=32) f.write(b"b" * 1237) f.truncate(15) f.seek(0) assert_equal(f.read(), b"b" * 15) f.truncate(31) f.seek(0) assert_equal(f.read(), b"b" * 15 + b"\x00" * 16) f.truncate(0) f.seek(0) assert_equal(len(f.read()), 0) f.close()
def run(): f = CryptFile(self.file_name, key=key, mode='r+b', block_size=32) f.truncate(0) last_data[0] = str(random.getrandbits(128)) f.write(last_data[0]) f.close()
class CachedFileInode(object): """ Logical file on-disk. There should be only a single CachedFileInode instance is per each logical file. """ def __init__(self, cachedb, upath, io, filecap, persistent=False): self.upath = upath self.closed = False self.refcnt = 0 self.persistent = persistent self.invalidated = False # Use per-file keys for different files, for safer fallback # in the extremely unlikely event of SHA512 hash collisions filename, key = cachedb.get_filename_and_key(upath) filename_state, key_state = cachedb.get_filename_and_key( upath, b'state') filename_data, key_data = cachedb.get_filename_and_key(upath, b'data') self.lock = threading.RLock() self.dirty = False self.f = None self.f_state = None self.f_data = None self.stream_f = None self.stream_offset = 0 self.stream_data = [] open_complete = False try: if filecap is None: # Create new file raise ValueError() # Reuse cached metadata self.f = CryptFile(filename, key=key, mode='r+b') self.info = json_zlib_load(self.f) if persistent: # Reuse cached data self.f_state = CryptFile(filename_state, key=key_state, mode='r+b') self.f_data = CryptFile(filename_data, key=key_data, mode='r+b') self.block_cache = BlockCachedFile.restore_state( self.f_data, self.f_state) open_complete = True except (IOError, OSError, ValueError): open_complete = False if self.f is not None: self.f.close() self.f = None if self.f_state is not None: self.f_state.close() if self.f_data is not None: self.f_data.close() if not open_complete: if self.f is None: self.f = CryptFile(filename, key=key, mode='w+b') try: if filecap is not None: self._load_info(filecap, io, iscap=True) else: self.info = ['file', {u'size': 0}] self.dirty = True except IOError as err: os.unlink(filename) self.f.close() raise # Create a data file self.f_data = CryptFile(filename_data, key=key_data, mode='w+b') # Block cache on top of data file self.block_cache = BlockCachedFile(self.f_data, self.info[1][u'size']) # Block data state file self.f_state = CryptFile(filename_state, key=key_state, mode='w+b') os.utime(self.f.path, None) os.utime(self.f_data.path, None) os.utime(self.f_state.path, None) def _load_info(self, upath, io, iscap=False): try: self.info = io.get_info(upath, iscap=iscap) except (HTTPError, IOError, ValueError) as err: if isinstance(err, HTTPError) and err.code == 404: raise IOError(errno.ENOENT, "no such file") raise IOError(errno.EREMOTEIO, "failed to retrieve information") self._save_info() def _save_info(self): self.f.truncate(0) self.f.seek(0) if u'retrieved' not in self.info[1]: self.info[1][u'retrieved'] = time.time() json_zlib_dump(self.info, self.f) def is_fresh(self, lifetime): if u'retrieved' not in self.info[1]: return True return (self.info[1][u'retrieved'] + lifetime >= time.time()) def incref(self): with self.lock: self.refcnt += 1 def decref(self): with self.lock: self.refcnt -= 1 if self.refcnt <= 0: self.close() def close(self): with self.lock: if not self.closed: if self.stream_f is not None: self.stream_f.close() self.stream_f = None self.stream_data = [] self.f_state.seek(0) self.f_state.truncate(0) self.block_cache.save_state(self.f_state) self.f_state.close() self.block_cache.close() self.f.close() if not self.persistent and self.upath is not None and not self.invalidated: os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.closed = True def _do_rw(self, io, offset, length_or_data, write=False, no_result=False): if write: data = length_or_data length = len(data) else: length = length_or_data self.lock.acquire() try: preempted = False while True: if write: pos = self.block_cache.pre_write(offset, length) else: pos = self.block_cache.pre_read(offset, length) if pos is None: # cache ready if no_result: return None elif write: return self.block_cache.write(offset, data) else: return self.block_cache.read(offset, length) else: # cache not ready -- fill it up c_offset, c_length = pos if self.stream_f is not None and ( self.stream_offset > c_offset or c_offset >= self.stream_offset + 3 * 131072): if not preempted: # Try to yield to a different in-flight cache operation, in case there # is one waiting for the lock preempted = True self.lock.release() time.sleep(0) self.lock.acquire() continue self.stream_f.close() self.stream_f = None self.stream_data = [] if self.stream_f is None: self.stream_f = io.get_content(self.info[1][u'ro_uri'], c_offset, iscap=True) self.stream_offset = c_offset self.stream_data = [] read_offset = self.stream_offset read_bytes = sum(len(x) for x in self.stream_data) while read_offset + read_bytes < c_offset + c_length: block = self.stream_f.read(131072) if not block: self.stream_f.close() self.stream_f = None self.stream_data = [] break self.stream_data.append(block) read_bytes += len(block) self.stream_offset, self.stream_data = self.block_cache.receive_cached_data( self.stream_offset, self.stream_data) except (HTTPError, IOError) as err: if self.stream_f is not None: self.stream_f.close() self.stream_f = None raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err), )) finally: self.lock.release() def get_size(self): return self.block_cache.get_size() def get_attr(self): return dict(type='file', size=self.get_size()) def read(self, io, offset, length): return self._do_rw(io, offset, length, write=False) def write(self, io, offset, data): """ Write data to file. If *offset* is None, it means append. """ with self.lock: if len(data) > 0: self.dirty = True if offset is None: offset = self.get_size() self._do_rw(io, offset, data, write=True) def truncate(self, size): with self.lock: if size != self.block_cache.get_size(): self.dirty = True self.block_cache.truncate(size) def _buffer_whole_file(self, io): self._do_rw(io, 0, self.block_cache.get_size(), write=False, no_result=True) def upload(self, io, parent_cap=None): with self.lock: # Buffer all data self._buffer_whole_file(io) # Upload the whole file class Fwrapper(object): def __init__(self, block_cache): self.block_cache = block_cache self.size = block_cache.get_size() self.f = self.block_cache.get_file() self.f.seek(0) def __len__(self): return self.size def read(self, size): return self.f.read(size) if parent_cap is None: upath = self.upath iscap = False else: upath = parent_cap + u"/" + ubasename(self.upath) iscap = True fw = Fwrapper(self.block_cache) try: filecap = io.put_file(upath, fw, iscap=iscap) except (HTTPError, IOError) as err: raise IOError(errno.EFAULT, "I/O error: %s" % (str(err), )) self.info[1][u'ro_uri'] = filecap self.info[1][u'size'] = self.get_size() self._save_info() self.dirty = False return filecap def unlink(self): with self.lock: if self.upath is not None and not self.invalidated: os.unlink(self.f.path) os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.upath = None
class CachedFileInode(object): """ Logical file on-disk. There should be only a single CachedFileInode instance is per each logical file. """ def __init__(self, cachedb, upath, io, filecap, persistent=False): self.upath = upath self.closed = False self.refcnt = 0 self.persistent = persistent self.invalidated = False # Use per-file keys for different files, for safer fallback # in the extremely unlikely event of SHA512 hash collisions filename, key = cachedb.get_filename_and_key(upath) filename_state, key_state = cachedb.get_filename_and_key(upath, b'state') filename_data, key_data = cachedb.get_filename_and_key(upath, b'data') self.lock = threading.RLock() self.dirty = False self.f = None self.f_state = None self.f_data = None self.stream_f = None self.stream_offset = 0 self.stream_data = [] open_complete = False try: if filecap is None: # Create new file raise ValueError() # Reuse cached metadata self.f = CryptFile(filename, key=key, mode='r+b') self.info = json_zlib_load(self.f) if persistent: # Reuse cached data self.f_state = CryptFile(filename_state, key=key_state, mode='r+b') self.f_data = CryptFile(filename_data, key=key_data, mode='r+b') self.block_cache = BlockCachedFile.restore_state(self.f_data, self.f_state) open_complete = True except (IOError, OSError, ValueError): open_complete = False if self.f is not None: self.f.close() self.f = None if self.f_state is not None: self.f_state.close() if self.f_data is not None: self.f_data.close() if not open_complete: if self.f is None: self.f = CryptFile(filename, key=key, mode='w+b') try: if filecap is not None: self._load_info(filecap, io, iscap=True) else: self.info = ['file', {u'size': 0}] self.dirty = True except IOError as err: os.unlink(filename) self.f.close() raise # Create a data file self.f_data = CryptFile(filename_data, key=key_data, mode='w+b') # Block cache on top of data file self.block_cache = BlockCachedFile(self.f_data, self.info[1][u'size']) # Block data state file self.f_state = CryptFile(filename_state, key=key_state, mode='w+b') os.utime(self.f.path, None) os.utime(self.f_data.path, None) os.utime(self.f_state.path, None) def _load_info(self, upath, io, iscap=False): try: self.info = io.get_info(upath, iscap=iscap) except (HTTPError, IOError, ValueError) as err: if isinstance(err, HTTPError) and err.code == 404: raise IOError(errno.ENOENT, "no such file") raise IOError(errno.EREMOTEIO, "failed to retrieve information") self._save_info() def _save_info(self): self.f.truncate(0) self.f.seek(0) if u'retrieved' not in self.info[1]: self.info[1][u'retrieved'] = time.time() json_zlib_dump(self.info, self.f) def is_fresh(self, lifetime): if u'retrieved' not in self.info[1]: return True return (self.info[1][u'retrieved'] + lifetime >= time.time()) def incref(self): with self.lock: self.refcnt += 1 def decref(self): with self.lock: self.refcnt -= 1 if self.refcnt <= 0: self.close() def close(self): with self.lock: if not self.closed: if self.stream_f is not None: self.stream_f.close() self.stream_f = None self.stream_data = [] self.f_state.seek(0) self.f_state.truncate(0) self.block_cache.save_state(self.f_state) self.f_state.close() self.block_cache.close() self.f.close() if not self.persistent and self.upath is not None and not self.invalidated: os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.closed = True def _do_rw(self, io, offset, length_or_data, write=False, no_result=False): if write: data = length_or_data length = len(data) else: length = length_or_data self.lock.acquire() try: preempted = False while True: if write: pos = self.block_cache.pre_write(offset, length) else: pos = self.block_cache.pre_read(offset, length) if pos is None: # cache ready if no_result: return None elif write: return self.block_cache.write(offset, data) else: return self.block_cache.read(offset, length) else: # cache not ready -- fill it up c_offset, c_length = pos if self.stream_f is not None and (self.stream_offset > c_offset or c_offset >= self.stream_offset + 3*131072): if not preempted: # Try to yield to a different in-flight cache operation, in case there # is one waiting for the lock preempted = True self.lock.release() time.sleep(0) self.lock.acquire() continue self.stream_f.close() self.stream_f = None self.stream_data = [] if self.stream_f is None: self.stream_f = io.get_content(self.info[1][u'ro_uri'], c_offset, iscap=True) self.stream_offset = c_offset self.stream_data = [] read_offset = self.stream_offset read_bytes = sum(len(x) for x in self.stream_data) while read_offset + read_bytes < c_offset + c_length: block = self.stream_f.read(131072) if not block: self.stream_f.close() self.stream_f = None self.stream_data = [] break self.stream_data.append(block) read_bytes += len(block) self.stream_offset, self.stream_data = self.block_cache.receive_cached_data( self.stream_offset, self.stream_data) except (HTTPError, IOError) as err: if self.stream_f is not None: self.stream_f.close() self.stream_f = None raise IOError(errno.EREMOTEIO, "I/O error: %s" % (str(err),)) finally: self.lock.release() def get_size(self): return self.block_cache.get_size() def get_attr(self): return dict(type='file', size=self.get_size()) def read(self, io, offset, length): return self._do_rw(io, offset, length, write=False) def write(self, io, offset, data): """ Write data to file. If *offset* is None, it means append. """ with self.lock: if len(data) > 0: self.dirty = True if offset is None: offset = self.get_size() self._do_rw(io, offset, data, write=True) def truncate(self, size): with self.lock: if size != self.block_cache.get_size(): self.dirty = True self.block_cache.truncate(size) def _buffer_whole_file(self, io): self._do_rw(io, 0, self.block_cache.get_size(), write=False, no_result=True) def upload(self, io, parent_cap=None): with self.lock: # Buffer all data self._buffer_whole_file(io) # Upload the whole file class Fwrapper(object): def __init__(self, block_cache): self.block_cache = block_cache self.size = block_cache.get_size() self.f = self.block_cache.get_file() self.f.seek(0) def __len__(self): return self.size def read(self, size): return self.f.read(size) if parent_cap is None: upath = self.upath iscap = False else: upath = parent_cap + u"/" + ubasename(self.upath) iscap = True fw = Fwrapper(self.block_cache) try: filecap = io.put_file(upath, fw, iscap=iscap) except (HTTPError, IOError) as err: raise IOError(errno.EFAULT, "I/O error: %s" % (str(err),)) self.info[1][u'ro_uri'] = filecap self.info[1][u'size'] = self.get_size() self._save_info() self.dirty = False return filecap def unlink(self): with self.lock: if self.upath is not None and not self.invalidated: os.unlink(self.f.path) os.unlink(self.f_state.path) os.unlink(self.f_data.path) self.upath = None