def write_chunk(self, chunk, is_dummy=False): """ Given a new chunk, write it to the storage. @type chunk: AbstractChunkWithContents @type is_dummy: bool """ # For clarity only super(ChunkStorageFS, self).write_chunk(chunk, is_dummy) file_path = self.__get_chunk_file_path(chunk.uuid, is_dummy=is_dummy) logger.debug('Chunk body %s being written to %s', chunk.uuid, file_path) with self.__chunk_op_lock: if os.path.exists(file_path): logger.warning('Attempting to write the chunk %s, ' 'but it exists already!', chunk.uuid) else: bytes = chunk.get_bytes() logger.debug('Writing chunk: %r (%i/%i/%i: %s)', chunk, len(bytes), chunk.size(), chunk.phys_size(), chunk.phys_size() == len(bytes)) with open_wb(file_path) as fh: fh.write(bytes) fh.flush() os.fsync(fh.fileno())
def _test_create_permfile(path, variant=0, repeat=1): """ Create a file with some artificial data, which is NOT removed automatically when it becomes unused. It is the caller duty to remove the file when it becomes unused. @param variant: Which variant of the text to use. @param repeat: How many times to repeat the text. """ assert repeat >= 1 with open_wb(path) as fh: fh.write(get_sample_data(variant) * repeat)
def __restore_single_file(self, file_path, file_, file_blocks, cryptographer): """Actual restore procedure for a single file. @param file_: the file to restore. @type file_: LocalPhysicalFileState @param file_path: what is the real path for the file. @type file_path: basestring @param file_blocks: the iterable of blocks used to restore the file (if needed). @type file_blocks: col.Iterable @param cryptographer: C{Cryptographer} object to use for decrypting. @type cryptographer: Cryptographer @raises CannotRestoreFile: if checksum or fingerprint of restored file is not correct, and that was spotted before corrupting the actual file. @raises FileCorrupt: if checksum or fingerprint of restored file is not correct, and that caused the corruption of a file. """ cls = self.__class__ file_blocks = list(file_blocks) # for reiterability # 1. Do a "dry run" to try to ensure nothing will be broken # during the restore. try: consume(self.__file_blocks_generator(file_, file_blocks, cryptographer)) except Exception as e: logger.debug('Cannot restore: %s', traceback.format_exc()) raise CannotRestoreFile('{:s} at {!r}'.format(file_.uuid, file_path)) else: # 2. Do a real run of restoration. # TODO: do not write if the existing file is newer, # but write to a "non-merged copy". # In more details: # * if the existing file is newer AND "hot" (i.e., not backed up, # or yet buffered to be backed up), this should be treated # as a name conflict. # * if the existing file is newer AND "cold" (i.e. in the cloud # already), just think that we've received some too old file # for some reason, and don't write anything. try: with open_wb(file_path) as fh: for block in self.__file_blocks_generator(file_, file_blocks, cryptographer): fh.write(block) # Finished the loop over blocks and writing the file. fh.flush() fh.truncate() # what if we were rewriting a larger file? os.fsync(fh.fileno()) except Exception as e: logger.debug('File corrupt: %s', traceback.format_exc()) raise FileCorrupt('{:s} at {!r}'.format(file_.uuid, file_path)) else: _new_dt = file_.time_changed _new_dt_local = _new_dt.replace(tzinfo=TZ_UTC) \ .astimezone(TZ_LOCAL) logger.debug('Converted update time from %s to %s', _new_dt, _new_dt_local) _new_ts = time.mktime(_new_dt_local.timetuple()) # _new_ts must be integer os.utime(file_path, (_new_ts, _new_ts)) logger.debug("%r file's mtime force set to %s", file_path, _new_dt)
def __restore_single_file(self, file_path, file_, file_blocks, cryptographer): """Actual restore procedure for a single file. @param file_: the file to restore. @type file_: LocalPhysicalFileState @param file_path: what is the real path for the file. @type file_path: basestring @param file_blocks: the iterable of blocks used to restore the file (if needed). @type file_blocks: col.Iterable @param cryptographer: C{Cryptographer} object to use for decrypting. @type cryptographer: Cryptographer @raises CannotRestoreFile: if checksum or fingerprint of restored file is not correct, and that was spotted before corrupting the actual file. @raises FileCorrupt: if checksum or fingerprint of restored file is not correct, and that caused the corruption of a file. """ cls = self.__class__ file_blocks = list(file_blocks) # for reiterability # 1. Do a "dry run" to try to ensure nothing will be broken # during the restore. try: consume( self.__file_blocks_generator(file_, file_blocks, cryptographer)) except Exception as e: logger.debug('Cannot restore: %s', traceback.format_exc()) raise CannotRestoreFile('{:s} at {!r}'.format( file_.uuid, file_path)) else: # 2. Do a real run of restoration. # TODO: do not write if the existing file is newer, # but write to a "non-merged copy". # In more details: # * if the existing file is newer AND "hot" (i.e., not backed up, # or yet buffered to be backed up), this should be treated # as a name conflict. # * if the existing file is newer AND "cold" (i.e. in the cloud # already), just think that we've received some too old file # for some reason, and don't write anything. try: with open_wb(file_path) as fh: for block in self.__file_blocks_generator( file_, file_blocks, cryptographer): fh.write(block) # Finished the loop over blocks and writing the file. fh.flush() fh.truncate() # what if we were rewriting a larger file? os.fsync(fh.fileno()) except Exception as e: logger.debug('File corrupt: %s', traceback.format_exc()) raise FileCorrupt('{:s} at {!r}'.format(file_.uuid, file_path)) else: _new_dt = file_.time_changed _new_dt_local = _new_dt.replace(tzinfo=TZ_UTC) \ .astimezone(TZ_LOCAL) logger.debug('Converted update time from %s to %s', _new_dt, _new_dt_local) _new_ts = time.mktime(_new_dt_local.timetuple()) # _new_ts must be integer os.utime(file_path, (_new_ts, _new_ts)) logger.debug("%r file's mtime force set to %s", file_path, _new_dt)
def migrate_chunks(self, old_path, new_path): """ Migrate the chunks from their previous path to the new one. @note: Only non-dummy chunks are migrated; dummy chunks are removed from the old place and not regenerated at the new place, please call update_dummy_chunks_size() manually for that. """ assert old_path != new_path, (old_path, new_path) # This two variables will be used to specify the progress of the task. num, of = 0, 0 @exceptions_logged(logger) def timercb(): """ Callback function called on timer firing. """ if (num, of) != (0, 0): logger_status_chunks_op.info( 'The chunk migration takes too long, completed %i/%i', num, of, extra={'_type': 'chunks_migration.progress', 'num': num, 'of': of}) timer_service = TimerService(1.0, timercb) # If the task takes more than 3 seconds, start notifying # about the progress _callLater = reactor.callLater # pylint:disable=E1101,C0103 # Won't worry about deferToThread here, cause it is very fast. long_task_timer = _callLater(3.0, timer_service.startService) with self.__chunk_op_lock: try: # What chunk files are present on the FS, # and what are the chunk UUIDs? present_chunk_uuids_iter = self.__get_chunk_uuids_on_fs() with db.RDB() as rdbw: dummy_chunks_in_db = \ frozenset(HostQueries.HostChunks .get_all_dummy_chunk_uuids( rdbw=rdbw)) # First, remove all the dummy chunks removed_dummy_chunks = [] for dummy_chunk_uuid in dummy_chunks_in_db: try: assert self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True, dir_path=old_path) \ == self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True) _path = self.__get_chunk_file_path(dummy_chunk_uuid, is_dummy=True) if os.path.exists(_path): os.unlink(_path) # If we removed the file successfully, let's append it # to the list of the chunks which are to be removed # from the DB. removed_dummy_chunks.append(dummy_chunk_uuid) except Exception as e: logger.error('Cannot remove dummy chunk %s: %s', dummy_chunk_uuid, e) HostQueries.HostChunks \ .delete_dummy_chunks(removed_dummy_chunks) # This dictionary maps the chunk UUID # to a tuple of the old filename and the new filename. # # Btw, no need to convert present_chunk_uuids_iter to set # and do the set difference, as it is the same complexity # as for ... if not in. uuid_to_filenames = \ {u: (self.__get_chunk_file_path(u, is_dummy=False, dir_path=old_path), self.__get_chunk_file_path(u, is_dummy=False, dir_path=new_path)) for u in present_chunk_uuids_iter if u not in dummy_chunks_in_db} # Now, move the files to the new directory. of = len(uuid_to_filenames) for u, (old_filename, new_filename) \ in uuid_to_filenames.iteritems(): logger.debug('Moving chunk %s from %s to %s', u, old_filename, new_filename) try: with open(old_filename, 'rb') as rfh: with open_wb(new_filename) as wfh: wfh.write(rfh.read()) except Exception: logger.error('Cannot move chunk %s from %s to %s', u, old_filename, new_filename) else: try: os.unlink(old_filename) except Exception: logger.error('Cannot remove chunk file %s', old_filename) num += 1 except Exception as e: logger_status_chunks_op_error.error( 'The chunks migration failed: %r', e, extra={'_type': 'chunks_migration.error', '_exc': e, '_tb': traceback.format_exc()}) finally: if (not long_task_timer.called and not long_task_timer.cancelled): long_task_timer.cancel() if timer_service.running: timer_service.stopService()