async def test_load_block_from_remote(alice_file_transactions, foo_txt): file_transactions = alice_file_transactions # Prepare the backend workspace_id = file_transactions.remote_loader.workspace_id await file_transactions.remote_loader.create_realm(workspace_id) foo_manifest = await foo_txt.get_manifest() chunk1_data = b"a" * 10 chunk2_data = b"b" * 5 chunk1 = Chunk.new(0, 10).evolve_as_block(chunk1_data) chunk2 = Chunk.new(10, 15).evolve_as_block(chunk2_data) foo_manifest = foo_manifest.evolve(blocks=((chunk1, chunk2), ), size=15) await foo_txt.set_manifest(foo_manifest) fd = foo_txt.open() await file_transactions.remote_loader.upload_block(chunk1.access, chunk1_data) await file_transactions.remote_loader.upload_block(chunk2.access, chunk2_data) await file_transactions.local_storage.clear_clean_block(chunk1.access.id) await file_transactions.local_storage.clear_clean_block(chunk2.access.id) data = await file_transactions.fd_read(fd, 14, 0) assert data == chunk1_data + chunk2_data[:4]
async def test_chunk_clearing(alice_workspace_storage, cache_only, clear_manifest): aws = alice_workspace_storage manifest = create_manifest(aws.device, LocalFileManifest) data1 = b"abc" chunk1 = Chunk.new(0, 3) data2 = b"def" chunk2 = Chunk.new(3, 6) manifest = manifest.evolve(blocks=((chunk1, chunk2), ), size=6) async with aws.lock_entry_id(manifest.id): # Set chunks and manifests await aws.set_chunk(chunk1.id, data1) await aws.set_chunk(chunk2.id, data2) await aws.set_manifest(manifest.id, manifest) # Set a new version of the manifest without the chunks removed_ids = {chunk1.id, chunk2.id} new_manifest = manifest.evolve(blocks=()) await aws.set_manifest(manifest.id, new_manifest, cache_only=cache_only, removed_ids=removed_ids) # The chunks are still accessible if cache_only: await aws.get_chunk(chunk1.id) == b"abc" await aws.get_chunk(chunk2.id) == b"def" # The chunks are gone else: with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk1.id) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk2.id) # Now flush the manifest if clear_manifest: await aws.clear_manifest(manifest.id) else: await aws.ensure_manifest_persistent(manifest.id) # The chunks are gone with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk1.id) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk2.id) # Idempotency await aws.manifest_storage._ensure_manifest_persistent(manifest.id)
def prepare_reshape( manifest: LocalFileManifest, ) -> Iterator[Tuple[Chunks, Chunk, Callable, Set[Union[BlockID, ChunkID]]]]: # Update manifest def update_manifest(block: int, manifest: LocalFileManifest, new_chunk: Chunk) -> LocalFileManifest: blocks = list(manifest.blocks) blocks[block] = (new_chunk, ) return manifest.evolve(blocks=tuple(blocks)) # Loop over blocks for block, chunks in enumerate(manifest.blocks): # Already a block if len(chunks) == 1 and chunks[0].is_block: continue # Update callback block_update = partial(update_manifest, block) # Already a pseudo-block if len(chunks) == 1 and chunks[0].is_pseudo_block: yield (chunks, chunks[0], block_update, set()) continue # Prepare new block start, stop = chunks[0].start, chunks[-1].stop new_chunk = Chunk.new(start, stop) # Cleanup removed_ids = chunk_id_set(chunks) # Yield operations yield (chunks, new_chunk, block_update, removed_ids)
async def test_chunk_interface(alice_workspace_storage): data = b"0123456" aws = alice_workspace_storage chunk = Chunk.new(0, 7) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk.id) with pytest.raises(FSLocalMissError): await aws.clear_chunk(chunk.id) await aws.clear_chunk(chunk.id, miss_ok=True) assert not await aws.chunk_storage.is_chunk(chunk.id) assert await aws.chunk_storage.get_total_size() == 0 await aws.set_chunk(chunk.id, data) assert await aws.get_chunk(chunk.id) == data assert await aws.chunk_storage.is_chunk(chunk.id) assert await aws.chunk_storage.get_total_size() >= 7 await aws.clear_chunk(chunk.id) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk.id) with pytest.raises(FSLocalMissError): await aws.clear_chunk(chunk.id) assert not await aws.chunk_storage.is_chunk(chunk.id) assert await aws.chunk_storage.get_total_size() == 0 await aws.clear_chunk(chunk.id, miss_ok=True)
async def test_block_interface(alice_workspace_storage): data = b"0123456" aws = alice_workspace_storage chunk = Chunk.new(0, 7).evolve_as_block(data) block_id = chunk.access.id await aws.clear_clean_block(block_id) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk.id) assert not await aws.block_storage.is_chunk(chunk.id) assert await aws.block_storage.get_total_size() == 0 await aws.set_clean_block(block_id, data) assert await aws.get_chunk(chunk.id) == data assert await aws.block_storage.is_chunk(chunk.id) assert await aws.block_storage.get_total_size() >= 7 await aws.clear_clean_block(block_id) await aws.clear_clean_block(block_id) with pytest.raises(FSLocalMissError): await aws.get_chunk(chunk.id) assert not await aws.block_storage.is_chunk(chunk.id) assert await aws.block_storage.get_total_size() == 0 await aws.set_chunk(chunk.id, data) assert await aws.get_dirty_block(block_id) == data
async def test_serialize_non_empty_local_file_manifest(tmpdir, alice, workspace_id): manifest = create_manifest(alice, LocalFileManifest) chunk1 = Chunk.new(0, 7).evolve_as_block(b"0123456") chunk2 = Chunk.new(7, 8) chunk3 = Chunk.new(8, 10) blocks = (chunk1, chunk2), (chunk3, ) manifest = manifest.evolve_and_mark_updated(blocksize=8, size=10, blocks=blocks) manifest.assert_integrity() async with WorkspaceStorage.run(alice, tmpdir, workspace_id) as aws: async with aws.lock_entry_id(manifest.id): await aws.set_manifest(manifest.id, manifest) async with WorkspaceStorage.run(alice, tmpdir, workspace_id) as aws2: assert await aws2.get_manifest(manifest.id) == manifest
async def test_garbage_collection(tmpdir, alice, workspace_id): block_size = DEFAULT_BLOCK_SIZE cache_size = 1 * block_size data = b"\x00" * block_size chunk1 = Chunk.new(0, block_size).evolve_as_block(data) chunk2 = Chunk.new(0, block_size).evolve_as_block(data) chunk3 = Chunk.new(0, block_size).evolve_as_block(data) async with WorkspaceStorage.run(alice, tmpdir, workspace_id, cache_size=cache_size) as aws: assert await aws.block_storage.get_nb_blocks() == 0 await aws.set_clean_block(chunk1.access.id, data) assert await aws.block_storage.get_nb_blocks() == 1 await aws.set_clean_block(chunk2.access.id, data) assert await aws.block_storage.get_nb_blocks() == 1 await aws.set_clean_block(chunk3.access.id, data) assert await aws.block_storage.get_nb_blocks() == 1 await aws.block_storage.clear_all_blocks() assert await aws.block_storage.get_nb_blocks() == 0
async def test_block_not_loaded_entry(alice_file_transactions, foo_txt): file_transactions = alice_file_transactions foo_manifest = await foo_txt.get_manifest() chunk1_data = b"a" * 10 chunk2_data = b"b" * 5 chunk1 = Chunk.new(0, 10).evolve_as_block(chunk1_data) chunk2 = Chunk.new(10, 15).evolve_as_block(chunk2_data) foo_manifest = foo_manifest.evolve(blocks=((chunk1, chunk2), ), size=15) async with file_transactions.local_storage.lock_entry_id( foo_manifest.parent): await foo_txt.set_manifest(foo_manifest) fd = foo_txt.open() with pytest.raises(FSRemoteBlockNotFound): await file_transactions.fd_read(fd, 14, 0) await file_transactions.local_storage.set_chunk(chunk1.id, chunk1_data) await file_transactions.local_storage.set_chunk(chunk2.id, chunk2_data) data = await file_transactions.fd_read(fd, 14, 0) assert data == chunk1_data + chunk2_data[:4]
def prepare_write( manifest: LocalFileManifest, size: int, offset: int ) -> Tuple[LocalFileManifest, List[Tuple[Chunk, int]], Set[Union[BlockID, ChunkID]]]: # Prepare padding = 0 removed_ids: Set[Union[BlockID, ChunkID]] = set() write_operations: List[Tuple[Chunk, int]] = [] # Padding if offset > manifest.size: padding = offset - manifest.size size += padding offset = manifest.size # Copy buffers blocks = list(manifest.blocks) # Loop over blocks for block, subsize, start, content_offset in split_write( size, offset, manifest.blocksize): # Prepare new chunk new_chunk = Chunk.new(start, start + subsize) write_operations.append((new_chunk, content_offset - padding)) # Lazy block write chunks = manifest.get_chunks(block) new_chunks, more_removed_ids = block_write(chunks, subsize, start, new_chunk) # Update data structures removed_ids |= more_removed_ids if len(blocks) == block: blocks.append(new_chunks) else: blocks[block] = new_chunks # Evolve manifest new_size = max(manifest.size, offset + size) new_manifest = manifest.evolve_and_mark_updated(size=new_size, blocks=tuple(blocks)) # Return write result return new_manifest, write_operations, removed_ids
async def test_vacuum(tmpdir, alice, workspace_id): data_size = 1 * 1024 * 1024 chunk = Chunk.new(0, data_size) async with WorkspaceStorage.run(alice, tmpdir, workspace_id, vacuum_threshold=data_size // 2) as aws: # Make sure the storage is empty data = b"\x00" * data_size assert await aws.data_localdb.get_disk_usage() < data_size # Set and commit a chunk of 1MB await aws.set_chunk(chunk.id, data) await aws.data_localdb.commit() assert await aws.data_localdb.get_disk_usage() > data_size # Run the vacuum await aws.run_vacuum() assert await aws.data_localdb.get_disk_usage() > data_size # Clear the chunk 1MB await aws.clear_chunk(chunk.id) await aws.data_localdb.commit() assert await aws.data_localdb.get_disk_usage() > data_size # Run the vacuum await aws.run_vacuum() assert await aws.data_localdb.get_disk_usage() < data_size # Make sure vacuum can run even if a transaction has started await aws.set_chunk(chunk.id, data) await aws.run_vacuum() await aws.clear_chunk(chunk.id) await aws.run_vacuum() # Vacuuming the cache storage is no-op await aws.cache_localdb.run_vacuum() # Make sure disk usage can be called on a closed storage assert await aws.data_localdb.get_disk_usage() < data_size
async def file_conflict( self, entry_id: EntryID, local_manifest: Union[LocalFolderManifest, LocalFileManifest], remote_manifest: BaseRemoteManifest, ) -> None: # This is the only transaction that affects more than one manifests # That's because the local version of the file has to be registered in the # parent as a new child while the remote version has to be set as the actual # version. In practice, this should not be an issue. # Lock parent then child parent_id = local_manifest.parent async with self.local_storage.lock_manifest( parent_id) as parent_manifest: async with self.local_storage.lock_manifest( entry_id) as current_manifest: # Make sure the file still exists filename = get_filename(parent_manifest, entry_id) if filename is None: return # Copy blocks new_blocks = [] for chunks in current_manifest.blocks: new_chunks = [] for chunk in chunks: data = await self.local_storage.get_chunk(chunk.id) new_chunk = Chunk.new(chunk.start, chunk.stop) await self.local_storage.set_chunk(new_chunk.id, data) if len(chunks) == 1: new_chunk = new_chunk.evolve_as_block(data) new_chunks.append(chunk) new_blocks.append(tuple(new_chunks)) new_blocks: Tuple[Tuple[Any, ...], ...] = tuple(new_blocks) # Prepare new_name = get_conflict_filename( filename, list(parent_manifest.children), remote_manifest.author) new_manifest = LocalFileManifest.new_placeholder( self.local_author, parent=parent_id).evolve(size=current_manifest.size, blocks=new_blocks) new_parent_manifest = parent_manifest.evolve_children_and_mark_updated( {new_name: new_manifest.id}, pattern_filter=self.local_storage.get_pattern_filter(), ) other_manifest = BaseLocalManifest.from_remote(remote_manifest) # Set manifests await self.local_storage.set_manifest(new_manifest.id, new_manifest, check_lock_status=False) await self.local_storage.set_manifest(parent_id, new_parent_manifest) await self.local_storage.set_manifest(entry_id, other_manifest) self._send_event(ClientEvent.FS_ENTRY_UPDATED, id=new_manifest.id) self._send_event(ClientEvent.FS_ENTRY_UPDATED, id=parent_id) self._send_event( ClientEvent.FS_ENTRY_FILE_CONFLICT_RESOLVED, id=entry_id, backup_id=new_manifest.id, )
def evolve(m, n): chunk = Chunk.new(0, n).evolve_as_block(b"a" * n) blocks = ((chunk, ), ) return m1.evolve_and_mark_updated(size=n, blocks=blocks)
def test_complete_scenario(): storage = Storage() with freeze_time("2000-01-01"): base = manifest = LocalFileManifest.new_placeholder( DeviceID.new(), parent=EntryID(), blocksize=16 ) assert manifest == base.evolve(size=0) with freeze_time("2000-01-02") as t2: manifest = storage.write(manifest, b"Hello ", 0) assert storage.read(manifest, 6, 0) == b"Hello " ((chunk0,),) = manifest.blocks assert manifest == base.evolve(size=6, blocks=((chunk0,),), updated=t2) assert chunk0 == Chunk(id=chunk0.id, start=0, stop=6, raw_offset=0, raw_size=6, access=None) assert storage[chunk0.id] == b"Hello " with freeze_time("2000-01-03") as t3: manifest = storage.write(manifest, b"world !", 6) assert storage.read(manifest, 13, 0) == b"Hello world !" ((_, chunk1),) = manifest.blocks assert manifest == base.evolve(size=13, blocks=((chunk0, chunk1),), updated=t3) assert chunk1 == Chunk(id=chunk1.id, start=6, stop=13, raw_offset=6, raw_size=7, access=None) assert storage[chunk1.id] == b"world !" with freeze_time("2000-01-04") as t4: manifest = storage.write(manifest, b"\n More kontent", 13) assert storage.read(manifest, 27, 0) == b"Hello world !\n More kontent" (_, _, chunk2), (chunk3,) = manifest.blocks assert storage[chunk2.id] == b"\n M" assert storage[chunk3.id] == b"ore kontent" assert manifest == base.evolve( size=27, blocks=((chunk0, chunk1, chunk2), (chunk3,)), updated=t4 ) with freeze_time("2000-01-05") as t5: manifest = storage.write(manifest, b"c", 20) assert storage.read(manifest, 27, 0) == b"Hello world !\n More content" chunk4, chunk5, chunk6 = manifest.blocks[1] assert chunk3.id == chunk4.id == chunk6.id assert storage[chunk5.id] == b"c" assert manifest == base.evolve( size=27, blocks=((chunk0, chunk1, chunk2), (chunk4, chunk5, chunk6)), updated=t5 ) with freeze_time("2000-01-06") as t6: manifest = storage.resize(manifest, 40) expected = b"Hello world !\n More content" + b"\x00" * 13 assert storage.read(manifest, 40, 0) == expected (_, _, _, chunk7), (chunk8,) = manifest.blocks[1:] assert storage[chunk7.id] == b"\x00" * 5 assert storage[chunk8.id] == b"\x00" * 8 assert manifest == base.evolve( size=40, blocks=((chunk0, chunk1, chunk2), (chunk4, chunk5, chunk6, chunk7), (chunk8,)), updated=t6, ) with freeze_time("2000-01-07") as t7: manifest = storage.resize(manifest, 25) expected = b"Hello world !\n More conte" assert storage.read(manifest, 25, 0) == expected ((_, _, chunk9),) = manifest.blocks[1:] assert chunk9.id == chunk6.id assert manifest == base.evolve( size=25, blocks=((chunk0, chunk1, chunk2), (chunk4, chunk5, chunk9)), updated=t7 ) with freeze_time("2000-01-08"): assert not manifest.is_reshaped() manifest = storage.reshape(manifest) expected = b"Hello world !\n More conte" assert storage.read(manifest, 25, 0) == expected assert manifest.is_reshaped() (chunk10,), (chunk11,) = manifest.blocks assert storage[chunk10.id] == b"Hello world !\n M" assert storage[chunk11.id] == b"ore conte" assert manifest == base.evolve(size=25, blocks=((chunk10,), (chunk11,)), updated=t7)