def test_list_directory_strips_compression_extensions(blockchain_path, compression): storage = PathOptimizedFileSystemStorage(blockchain_path) mkdir_and_touch(blockchain_path / f'a/a.txt.{compression}') listed = list(storage.list_directory()) assert listed == ['a.txt']
def test_list_directory_filename_is_not_duplicated_on_name_conflict(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) mkdir_and_touch(blockchain_path / 'f/i/l/e/file.txt') mkdir_and_touch(blockchain_path / 'f/i/l/e/file.txt.gz') listed = list(storage.list_directory()) assert listed == ['file.txt']
def test_non_optimized_paths_are_not_listed(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) non_optimized_file_path = 'file.txt' mkdir_and_touch(blockchain_path / non_optimized_file_path) listed = list(storage.list_directory()) assert listed == []
def test_can_list_directory_with_sorting(blockchain_path, sort_direction, expected): storage = PathOptimizedFileSystemStorage(blockchain_path) mkdir_and_touch(blockchain_path / '1/1.txt') mkdir_and_touch(blockchain_path / '1/0/10.txt') mkdir_and_touch(blockchain_path / '2/2.txt') listed = list(storage.list_directory(sort_direction=sort_direction)) assert listed == expected
def test_can_load(blockchain_path, base_filename, optimized_file_path): binary_data = b'\x08Test' fss = PathOptimizedFileSystemStorage(base_path=blockchain_path) fss.save(base_filename, binary_data) assert os.path.isfile(optimized_file_path) with open(optimized_file_path, 'rb') as fo: assert fo.read() == binary_data assert fss.load(base_filename) == binary_data
def test_list_subdirectory(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) mkdir_and_touch(blockchain_path / 'path_1/a/a.txt') mkdir_and_touch(blockchain_path / 'path_1/b/b.txt') mkdir_and_touch(blockchain_path / 'path_2/c/c.txt') mkdir_and_touch(blockchain_path / 'path_2/d/d.txt') listed = set(storage.list_directory(prefix='path_1')) assert listed == {'path_1/a.txt', 'path_1/b.txt'}
def test_compression(blockchain_path, base_filename, optimized_file_path): binary_data = b'A' * 10000 fss = PathOptimizedFileSystemStorage(base_path=blockchain_path, compressors=('gz',)) fss.save(base_filename, binary_data, is_final=True) expected_path = optimized_file_path + '.gz' assert os.path.isfile(expected_path) with gzip.open(expected_path, 'rb') as fo: assert fo.read() == binary_data assert fss.load(base_filename) == binary_data
def test_can_list_directory_without_sorting(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) mkdir_and_touch(blockchain_path / '1/1.txt') mkdir_and_touch(blockchain_path / '1/0/10.txt') mkdir_and_touch(blockchain_path / '2/2.txt') listed = storage.list_directory(sort_direction=None) assert { '1.txt', '10.txt', '2.txt', } == set(listed)
def test_list_optimized_sorting_is_correct(blockchain_path): return_value = [ (str(blockchain_path / 'a'), ('b',), ('a.txt',)), (str(blockchain_path / 'a/b'), (), ('ab.txt',)), (str(blockchain_path / 'z'), (), ('z.txt',)), ] storage = PathOptimizedFileSystemStorage(blockchain_path) with patch('os.walk') as os_walk_mock: os_walk_mock.return_value = return_value assert list(storage.list_directory()) == ['a.txt', 'ab.txt', 'z.txt'] with patch('os.walk') as os_walk_mock: os_walk_mock.return_value = return_value assert list(storage.list_directory(sort_direction=-1)) == ['z.txt', 'ab.txt', 'a.txt']
def __init__( self, *, base_directory, # Account root files account_root_files_subdir=DEFAULT_BLOCKCHAIN_STATES_SUBDIR, account_root_files_cache_size=128, account_root_files_storage_kwargs=None, # Blocks blocks_subdir=DEFAULT_BLOCKS_SUBDIR, block_chunk_size=DEFAULT_BLOCK_CHUNK_SIZE, blocks_cache_size=None, blocks_storage_kwargs=None, lock_filename='file.lock', **kwargs): if not os.path.isabs(base_directory): raise ValueError('base_directory must be an absolute path') snapshot_period_in_blocks = kwargs.setdefault( 'snapshot_period_in_blocks', block_chunk_size) super().__init__(**kwargs) self.block_chunk_size = block_chunk_size account_root_files_directory = os.path.join(base_directory, account_root_files_subdir) block_directory = os.path.join(base_directory, blocks_subdir) self.base_directory = base_directory self.block_storage = PathOptimizedFileSystemStorage( base_path=block_directory, **(blocks_storage_kwargs or {})) self.account_root_files_storage = PathOptimizedFileSystemStorage( base_path=account_root_files_directory, **(account_root_files_storage_kwargs or {})) self.account_root_files_cache = LRUCache(account_root_files_cache_size) self.blocks_cache = LRUCache( # We do not really need to cache more than `snapshot_period_in_blocks` blocks since # we use use account root file as a base snapshot_period_in_blocks * 2 if blocks_cache_size is None else blocks_cache_size) self._file_lock = None self.lock_filename = lock_filename
def __init__( self, *, base_directory, # Account root files account_root_files_subdir=DEFAULT_BLOCKCHAIN_STATES_SUBDIR, account_root_files_cache_size=128, account_root_files_storage_kwargs=None, # Blocks blocks_subdir=DEFAULT_BLOCKS_SUBDIR, block_chunk_size=DEFAULT_BLOCK_CHUNK_SIZE, blocks_cache_size=None, blocks_storage_kwargs=None, lock_filename='file.lock', **kwargs): if not os.path.isabs(base_directory): raise ValueError('base_directory must be an absolute path') kwargs.setdefault('snapshot_period_in_blocks', block_chunk_size) super().__init__(**kwargs) self.block_chunk_size = block_chunk_size account_root_files_directory = os.path.join(base_directory, account_root_files_subdir) block_directory = os.path.join(base_directory, blocks_subdir) self.base_directory = base_directory self.block_storage = PathOptimizedFileSystemStorage( base_path=block_directory, **(blocks_storage_kwargs or {})) self.blockchain_states_storage = PathOptimizedFileSystemStorage( base_path=account_root_files_directory, **(account_root_files_storage_kwargs or {})) self.account_root_files_cache_size = account_root_files_cache_size self.blocks_cache_size = blocks_cache_size self.blockchain_states_cache: Optional[LRUCache] = None self.blocks_cache: Optional[LRUCache] = None self.initialize_caches() self._file_lock = None self.lock_filename = lock_filename
def test_list_directory(blockchain_directory): base_directory = os.path.join(blockchain_directory, 'test') fss = PathOptimizedFileSystemStorage(base_directory, compressors=('gz',)) fss.save('1434567890.txt', b'A' * 1000, is_final=True) fss.save('1134567890.txt', b'test1') fss.save('1234567890.txt', b'test2') fss.save('1334567890.txt', b'test3') assert { '1134567890.txt', '1234567890.txt', '1334567890.txt', '1434567890.txt', } == set(fss.list_directory())
def test_can_finalize(blockchain_path, base_filename, optimized_file_path): fss = PathOptimizedFileSystemStorage(base_path=blockchain_path) fss.save(base_filename, b'\x08Test') assert os_stat(optimized_file_path).st_mode & (stat.S_IWGRP | stat.S_IWUSR | stat.S_IWOTH) != 0 fss.finalize(base_filename) assert os_stat(optimized_file_path).st_mode & (stat.S_IWGRP | stat.S_IWUSR | stat.S_IWOTH) == 0
def test_move(blockchain_path): source = 'file1.txt' destination = 'file2.txt' storage = PathOptimizedFileSystemStorage(blockchain_path, max_depth=5) storage.save(source, b'AAA') assert os.path.isfile(str(blockchain_path / 'f/i/l/e/1/file1.txt')) storage.move(source, destination) assert os.path.isfile(str(blockchain_path / 'f/i/l/e/2/file2.txt')) assert not os.path.isfile(str(blockchain_path / 'f/i/l/e/1/file1.txt')) assert storage.load(destination) == b'AAA'
def test_can_append(blockchain_path, base_filename, optimized_file_path): fss = PathOptimizedFileSystemStorage(base_path=blockchain_path) fss.save(base_filename, b'\x08Test') assert os.path.isfile(optimized_file_path) with open(optimized_file_path, 'rb') as fo: assert fo.read() == b'\x08Test' fss.append(base_filename, b'\x09\x0aAPPEND') with open(optimized_file_path, 'rb') as fo: assert fo.read() == b'\x08Test\x09\x0aAPPEND'
class FileBlockchain(BlockchainBase): def __init__( self, *, base_directory, # Account root files account_root_files_subdir=DEFAULT_BLOCKCHAIN_STATES_SUBDIR, account_root_files_cache_size=128, account_root_files_storage_kwargs=None, # Blocks blocks_subdir=DEFAULT_BLOCKS_SUBDIR, block_chunk_size=DEFAULT_BLOCK_CHUNK_SIZE, blocks_cache_size=None, blocks_storage_kwargs=None, lock_filename='file.lock', **kwargs): if not os.path.isabs(base_directory): raise ValueError('base_directory must be an absolute path') kwargs.setdefault('snapshot_period_in_blocks', block_chunk_size) super().__init__(**kwargs) self.block_chunk_size = block_chunk_size account_root_files_directory = os.path.join(base_directory, account_root_files_subdir) block_directory = os.path.join(base_directory, blocks_subdir) self.base_directory = base_directory self.block_storage = PathOptimizedFileSystemStorage( base_path=block_directory, **(blocks_storage_kwargs or {})) self.blockchain_states_storage = PathOptimizedFileSystemStorage( base_path=account_root_files_directory, **(account_root_files_storage_kwargs or {})) self.account_root_files_cache_size = account_root_files_cache_size self.blocks_cache_size = blocks_cache_size self.blockchain_states_cache: Optional[LRUCache] = None self.blocks_cache: Optional[LRUCache] = None self.initialize_caches() self._file_lock = None self.lock_filename = lock_filename @property def file_lock(self): file_lock = self._file_lock if file_lock is None: base_directory = self.base_directory os.makedirs(base_directory, exist_ok=True) lock_file_path = os.path.join(base_directory, self.lock_filename) self._file_lock = file_lock = filelock.FileLock(lock_file_path, timeout=0) return file_lock @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION) def clear(self): self.initialize_caches() self.block_storage.clear() self.blockchain_states_storage.clear() def initialize_caches(self): self.blockchain_states_cache = LRUCache( self.account_root_files_cache_size) self.blocks_cache = LRUCache( # We do not really need to cache more than `snapshot_period_in_blocks` blocks since # we use use account root file as a base self.snapshot_period_in_blocks * 2 if self.blocks_cache_size is None else self.blocks_cache_size) # Account root files methods @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION) def add_blockchain_state(self, blockchain_state: BlockchainState): return super().add_blockchain_state(blockchain_state) @ensure_locked(lock_attr='file_lock', exception=EXPECTED_LOCK_EXCEPTION) def persist_blockchain_state(self, blockchain_state: BlockchainState): storage = self.blockchain_states_storage last_block_number = blockchain_state.last_block_number filename = make_blockchain_state_filename(last_block_number) storage.save(filename, blockchain_state.to_messagepack(), is_final=True) def _load_blockchain_states(self, file_path): cache = self.blockchain_states_cache account_root_file = cache.get(file_path) if account_root_file is None: storage = self.blockchain_states_storage assert storage.is_finalized(file_path) account_root_file = BlockchainState.from_messagepack( storage.load(file_path)) cache[file_path] = account_root_file return account_root_file def _yield_blockchain_states( self, direction) -> Generator[BlockchainState, None, None]: assert direction in (1, -1) storage = self.blockchain_states_storage for file_path in storage.list_directory(sort_direction=direction): yield self._load_blockchain_states(file_path) def yield_blockchain_states( self) -> Generator[BlockchainState, None, None]: yield from self._yield_blockchain_states(1) def yield_blockchain_states_reversed( self) -> Generator[BlockchainState, None, None]: yield from self._yield_blockchain_states(-1) def get_blockchain_states_count(self) -> int: storage = self.blockchain_states_storage return ilen(storage.list_directory()) # Blocks methods @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION) def add_block(self, block: Block, validate=True): return super().add_block(block, validate) @ensure_locked(lock_attr='file_lock', exception=EXPECTED_LOCK_EXCEPTION) def persist_block(self, block: Block): storage = self.block_storage block_chunk_size = self.block_chunk_size block_number = block.message.block_number chunk_number, offset = divmod(block_number, block_chunk_size) chunk_block_number_start = chunk_number * block_chunk_size if chunk_block_number_start == block_number: append_end = block_number else: assert chunk_block_number_start < block_number append_end = block_number - 1 append_filename = make_block_chunk_filename( start=chunk_block_number_start, end=append_end) filename = make_block_chunk_filename(start=chunk_block_number_start, end=block_number) storage.append(append_filename, block.to_messagepack()) if append_filename != filename: storage.move(append_filename, filename) if offset == block_chunk_size - 1: storage.finalize(filename) def yield_blocks(self) -> Generator[Block, None, None]: yield from self._yield_blocks(1) @timeit(verbose_args=True, is_method=True) def yield_blocks_reversed(self) -> Generator[Block, None, None]: yield from self._yield_blocks(-1) def yield_blocks_from(self, block_number: int) -> Generator[Block, None, None]: for file_path in self._list_block_directory(): meta = get_block_chunk_file_path_meta(file_path) if meta is None: logger.warning('File %s has invalid name format', file_path) continue if meta.end < block_number: continue yield from self._yield_blocks_from_file_cached(file_path, direction=1, start=max( meta.start, block_number)) def get_block_by_number(self, block_number: int) -> Optional[Block]: assert self.blocks_cache block = self.blocks_cache.get(block_number) if block is not None: return block try: return next(self.yield_blocks_from(block_number)) except StopIteration: return None def get_block_count(self) -> int: count = 0 for file_path in self._list_block_directory(): meta = get_block_chunk_file_path_meta(file_path) if meta is None: logger.warning('File %s has invalid name format', file_path) continue count += meta.end - meta.start + 1 return count @timeit(verbose_args=True, is_method=True) def _yield_blocks(self, direction) -> Generator[Block, None, None]: assert direction in (1, -1) for file_path in self._list_block_directory(direction): yield from self._yield_blocks_from_file_cached( file_path, direction) def _yield_blocks_from_file_cached(self, file_path, direction, start=None): assert direction in (1, -1) meta = get_block_chunk_file_path_meta(file_path) if meta is None: logger.warning('File %s has invalid name format', file_path) return file_start = meta.start file_end = meta.end if direction == 1: next_block_number = cache_start = file_start if start is None else start cache_end = file_end else: cache_start = file_start next_block_number = cache_end = file_end if start is None else start for block in self._yield_blocks_from_cache(cache_start, cache_end, direction): assert next_block_number == block.message.block_number next_block_number += direction yield block if file_start <= next_block_number <= file_end: yield from self._yield_blocks_from_file(file_path, direction, start=next_block_number) def _yield_blocks_from_file(self, file_path, direction, start=None): assert direction in (1, -1) storage = self.block_storage unpacker = msgpack.Unpacker() unpacker.feed(storage.load(file_path)) if direction == -1: unpacker = always_reversible(unpacker) for block_compact_dict in unpacker: block = Block.from_compact_dict(block_compact_dict) block_number = block.message.block_number # TODO(dmu) HIGH: Implement a better skip if start is not None: if direction == 1 and block_number < start: continue elif direction == -1 and block_number > start: continue self.blocks_cache[block_number] = block yield block def _yield_blocks_from_cache(self, start_block_number, end_block_number, direction): assert direction in (1, -1) iter_ = range(start_block_number, end_block_number + 1) if direction == -1: iter_ = always_reversible(iter_) for block_number in iter_: block = self.blocks_cache.get(block_number) if block is None: break yield block def _list_block_directory(self, direction=1): yield from self.block_storage.list_directory(sort_direction=direction)
def test_can_load_from_optimized_path(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) with patch('thenewboston_node.business_logic.storages.file_system.FileSystemStorage.load') as load_mock: storage.load('parent/file.txt') load_mock.assert_called_once_with('parent/f/i/l/e/file.txt')
def test_cannot_list_outer_directory(blockchain_path, compressible_data): fss = PathOptimizedFileSystemStorage(blockchain_path / 'subdir') with pytest.raises(ValueError): list(fss.list_directory('..'))
def test_can_append_to_optimized_path(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) with patch('thenewboston_node.business_logic.storages.file_system.FileSystemStorage.append') as append_mock: storage.append('parent/file.txt', b'test data') append_mock.assert_called_once_with('parent/f/i/l/e/file.txt', b'test data', is_final=False)
def test_can_finalize_to_optimized_path(blockchain_path): storage = PathOptimizedFileSystemStorage(blockchain_path) with patch('thenewboston_node.business_logic.storages.file_system.FileSystemStorage._finalize') as finalize_mock: storage.finalize('parent/file.txt') finalize_mock.assert_called_once_with(blockchain_path / 'parent/f/i/l/e/file.txt')
def test_list_directory_validate_sort_direction(blockchain_directory, wrong_sort_direction): storage = PathOptimizedFileSystemStorage(blockchain_directory) with pytest.raises(ValueError): list(storage.list_directory(sort_direction=wrong_sort_direction))