def test_list_optimized_sorting_is_correct(blockchain_path):
    return_value = [
        (str(blockchain_path / 'a'), ('b',), ('a.txt',)),
        (str(blockchain_path / 'a/b'), (), ('ab.txt',)),
        (str(blockchain_path / 'z'), (), ('z.txt',)),
    ]
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    with patch('os.walk') as os_walk_mock:
        os_walk_mock.return_value = return_value
        assert list(storage.list_directory()) == ['a.txt', 'ab.txt', 'z.txt']

    with patch('os.walk') as os_walk_mock:
        os_walk_mock.return_value = return_value
        assert list(storage.list_directory(sort_direction=-1)) == ['z.txt', 'ab.txt', 'a.txt']
def test_list_directory_strips_compression_extensions(blockchain_path, compression):
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    mkdir_and_touch(blockchain_path / f'a/a.txt.{compression}')

    listed = list(storage.list_directory())
    assert listed == ['a.txt']
def test_list_directory_filename_is_not_duplicated_on_name_conflict(blockchain_path):
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    mkdir_and_touch(blockchain_path / 'f/i/l/e/file.txt')
    mkdir_and_touch(blockchain_path / 'f/i/l/e/file.txt.gz')

    listed = list(storage.list_directory())
    assert listed == ['file.txt']
def test_non_optimized_paths_are_not_listed(blockchain_path):
    storage = PathOptimizedFileSystemStorage(blockchain_path)
    non_optimized_file_path = 'file.txt'

    mkdir_and_touch(blockchain_path / non_optimized_file_path)

    listed = list(storage.list_directory())
    assert listed == []
def test_can_list_directory_with_sorting(blockchain_path, sort_direction, expected):
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    mkdir_and_touch(blockchain_path / '1/1.txt')
    mkdir_and_touch(blockchain_path / '1/0/10.txt')
    mkdir_and_touch(blockchain_path / '2/2.txt')

    listed = list(storage.list_directory(sort_direction=sort_direction))
    assert listed == expected
def test_list_subdirectory(blockchain_path):
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    mkdir_and_touch(blockchain_path / 'path_1/a/a.txt')
    mkdir_and_touch(blockchain_path / 'path_1/b/b.txt')

    mkdir_and_touch(blockchain_path / 'path_2/c/c.txt')
    mkdir_and_touch(blockchain_path / 'path_2/d/d.txt')

    listed = set(storage.list_directory(prefix='path_1'))
    assert listed == {'path_1/a.txt', 'path_1/b.txt'}
def test_can_list_directory_without_sorting(blockchain_path):
    storage = PathOptimizedFileSystemStorage(blockchain_path)

    mkdir_and_touch(blockchain_path / '1/1.txt')
    mkdir_and_touch(blockchain_path / '1/0/10.txt')
    mkdir_and_touch(blockchain_path / '2/2.txt')

    listed = storage.list_directory(sort_direction=None)
    assert {
        '1.txt',
        '10.txt',
        '2.txt',
    } == set(listed)
def test_list_directory(blockchain_directory):
    base_directory = os.path.join(blockchain_directory, 'test')

    fss = PathOptimizedFileSystemStorage(base_directory, compressors=('gz',))
    fss.save('1434567890.txt', b'A' * 1000, is_final=True)
    fss.save('1134567890.txt', b'test1')
    fss.save('1234567890.txt', b'test2')
    fss.save('1334567890.txt', b'test3')

    assert {
        '1134567890.txt',
        '1234567890.txt',
        '1334567890.txt',
        '1434567890.txt',
    } == set(fss.list_directory())
def test_cannot_list_outer_directory(blockchain_path, compressible_data):
    fss = PathOptimizedFileSystemStorage(blockchain_path / 'subdir')

    with pytest.raises(ValueError):
        list(fss.list_directory('..'))
def test_list_directory_validate_sort_direction(blockchain_directory, wrong_sort_direction):
    storage = PathOptimizedFileSystemStorage(blockchain_directory)
    with pytest.raises(ValueError):
        list(storage.list_directory(sort_direction=wrong_sort_direction))
コード例 #11
0
class FileBlockchain(BlockchainBase):
    def __init__(
            self,
            *,
            base_directory,

            # Account root files
            account_root_files_subdir=DEFAULT_BLOCKCHAIN_STATES_SUBDIR,
            account_root_files_cache_size=128,
            account_root_files_storage_kwargs=None,

            # Blocks
            blocks_subdir=DEFAULT_BLOCKS_SUBDIR,
            block_chunk_size=DEFAULT_BLOCK_CHUNK_SIZE,
            blocks_cache_size=None,
            blocks_storage_kwargs=None,
            lock_filename='file.lock',
            **kwargs):
        if not os.path.isabs(base_directory):
            raise ValueError('base_directory must be an absolute path')

        kwargs.setdefault('snapshot_period_in_blocks', block_chunk_size)
        super().__init__(**kwargs)

        self.block_chunk_size = block_chunk_size

        account_root_files_directory = os.path.join(base_directory,
                                                    account_root_files_subdir)
        block_directory = os.path.join(base_directory, blocks_subdir)
        self.base_directory = base_directory

        self.block_storage = PathOptimizedFileSystemStorage(
            base_path=block_directory, **(blocks_storage_kwargs or {}))
        self.blockchain_states_storage = PathOptimizedFileSystemStorage(
            base_path=account_root_files_directory,
            **(account_root_files_storage_kwargs or {}))

        self.account_root_files_cache_size = account_root_files_cache_size
        self.blocks_cache_size = blocks_cache_size

        self.blockchain_states_cache: Optional[LRUCache] = None
        self.blocks_cache: Optional[LRUCache] = None
        self.initialize_caches()

        self._file_lock = None
        self.lock_filename = lock_filename

    @property
    def file_lock(self):
        file_lock = self._file_lock
        if file_lock is None:
            base_directory = self.base_directory
            os.makedirs(base_directory, exist_ok=True)
            lock_file_path = os.path.join(base_directory, self.lock_filename)
            self._file_lock = file_lock = filelock.FileLock(lock_file_path,
                                                            timeout=0)

        return file_lock

    @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION)
    def clear(self):
        self.initialize_caches()
        self.block_storage.clear()
        self.blockchain_states_storage.clear()

    def initialize_caches(self):
        self.blockchain_states_cache = LRUCache(
            self.account_root_files_cache_size)
        self.blocks_cache = LRUCache(
            # We do not really need to cache more than `snapshot_period_in_blocks` blocks since
            # we use use account root file as a base
            self.snapshot_period_in_blocks *
            2 if self.blocks_cache_size is None else self.blocks_cache_size)

    # Account root files methods
    @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION)
    def add_blockchain_state(self, blockchain_state: BlockchainState):
        return super().add_blockchain_state(blockchain_state)

    @ensure_locked(lock_attr='file_lock', exception=EXPECTED_LOCK_EXCEPTION)
    def persist_blockchain_state(self, blockchain_state: BlockchainState):
        storage = self.blockchain_states_storage
        last_block_number = blockchain_state.last_block_number

        filename = make_blockchain_state_filename(last_block_number)
        storage.save(filename,
                     blockchain_state.to_messagepack(),
                     is_final=True)

    def _load_blockchain_states(self, file_path):
        cache = self.blockchain_states_cache
        account_root_file = cache.get(file_path)
        if account_root_file is None:
            storage = self.blockchain_states_storage
            assert storage.is_finalized(file_path)
            account_root_file = BlockchainState.from_messagepack(
                storage.load(file_path))
            cache[file_path] = account_root_file

        return account_root_file

    def _yield_blockchain_states(
            self, direction) -> Generator[BlockchainState, None, None]:
        assert direction in (1, -1)

        storage = self.blockchain_states_storage
        for file_path in storage.list_directory(sort_direction=direction):
            yield self._load_blockchain_states(file_path)

    def yield_blockchain_states(
            self) -> Generator[BlockchainState, None, None]:
        yield from self._yield_blockchain_states(1)

    def yield_blockchain_states_reversed(
            self) -> Generator[BlockchainState, None, None]:
        yield from self._yield_blockchain_states(-1)

    def get_blockchain_states_count(self) -> int:
        storage = self.blockchain_states_storage
        return ilen(storage.list_directory())

    # Blocks methods
    @lock_method(lock_attr='file_lock', exception=LOCKED_EXCEPTION)
    def add_block(self, block: Block, validate=True):
        return super().add_block(block, validate)

    @ensure_locked(lock_attr='file_lock', exception=EXPECTED_LOCK_EXCEPTION)
    def persist_block(self, block: Block):
        storage = self.block_storage
        block_chunk_size = self.block_chunk_size

        block_number = block.message.block_number
        chunk_number, offset = divmod(block_number, block_chunk_size)

        chunk_block_number_start = chunk_number * block_chunk_size

        if chunk_block_number_start == block_number:
            append_end = block_number
        else:
            assert chunk_block_number_start < block_number
            append_end = block_number - 1

        append_filename = make_block_chunk_filename(
            start=chunk_block_number_start, end=append_end)
        filename = make_block_chunk_filename(start=chunk_block_number_start,
                                             end=block_number)

        storage.append(append_filename, block.to_messagepack())

        if append_filename != filename:
            storage.move(append_filename, filename)

        if offset == block_chunk_size - 1:
            storage.finalize(filename)

    def yield_blocks(self) -> Generator[Block, None, None]:
        yield from self._yield_blocks(1)

    @timeit(verbose_args=True, is_method=True)
    def yield_blocks_reversed(self) -> Generator[Block, None, None]:
        yield from self._yield_blocks(-1)

    def yield_blocks_from(self,
                          block_number: int) -> Generator[Block, None, None]:
        for file_path in self._list_block_directory():
            meta = get_block_chunk_file_path_meta(file_path)
            if meta is None:
                logger.warning('File %s has invalid name format', file_path)
                continue

            if meta.end < block_number:
                continue

            yield from self._yield_blocks_from_file_cached(file_path,
                                                           direction=1,
                                                           start=max(
                                                               meta.start,
                                                               block_number))

    def get_block_by_number(self, block_number: int) -> Optional[Block]:
        assert self.blocks_cache
        block = self.blocks_cache.get(block_number)
        if block is not None:
            return block

        try:
            return next(self.yield_blocks_from(block_number))
        except StopIteration:
            return None

    def get_block_count(self) -> int:
        count = 0
        for file_path in self._list_block_directory():
            meta = get_block_chunk_file_path_meta(file_path)
            if meta is None:
                logger.warning('File %s has invalid name format', file_path)
                continue

            count += meta.end - meta.start + 1

        return count

    @timeit(verbose_args=True, is_method=True)
    def _yield_blocks(self, direction) -> Generator[Block, None, None]:
        assert direction in (1, -1)

        for file_path in self._list_block_directory(direction):
            yield from self._yield_blocks_from_file_cached(
                file_path, direction)

    def _yield_blocks_from_file_cached(self, file_path, direction, start=None):
        assert direction in (1, -1)

        meta = get_block_chunk_file_path_meta(file_path)
        if meta is None:
            logger.warning('File %s has invalid name format', file_path)
            return

        file_start = meta.start
        file_end = meta.end
        if direction == 1:
            next_block_number = cache_start = file_start if start is None else start
            cache_end = file_end
        else:
            cache_start = file_start
            next_block_number = cache_end = file_end if start is None else start

        for block in self._yield_blocks_from_cache(cache_start, cache_end,
                                                   direction):
            assert next_block_number == block.message.block_number
            next_block_number += direction
            yield block

        if file_start <= next_block_number <= file_end:
            yield from self._yield_blocks_from_file(file_path,
                                                    direction,
                                                    start=next_block_number)

    def _yield_blocks_from_file(self, file_path, direction, start=None):
        assert direction in (1, -1)
        storage = self.block_storage

        unpacker = msgpack.Unpacker()
        unpacker.feed(storage.load(file_path))
        if direction == -1:
            unpacker = always_reversible(unpacker)

        for block_compact_dict in unpacker:
            block = Block.from_compact_dict(block_compact_dict)
            block_number = block.message.block_number
            # TODO(dmu) HIGH: Implement a better skip
            if start is not None:
                if direction == 1 and block_number < start:
                    continue
                elif direction == -1 and block_number > start:
                    continue

            self.blocks_cache[block_number] = block
            yield block

    def _yield_blocks_from_cache(self, start_block_number, end_block_number,
                                 direction):
        assert direction in (1, -1)

        iter_ = range(start_block_number, end_block_number + 1)
        if direction == -1:
            iter_ = always_reversible(iter_)

        for block_number in iter_:
            block = self.blocks_cache.get(block_number)
            if block is None:
                break

            yield block

    def _list_block_directory(self, direction=1):
        yield from self.block_storage.list_directory(sort_direction=direction)