def test_perform_block_create(app): content = 'foo' with freeze_time('2012-01-01') as frozen_datetime: eff = app.perform_block_create(EBlockCreate(content)) block_id = perform_sequence([], eff) assert app.last_modified == Arrow.fromdatetime(frozen_datetime()) eff = app.perform_block_read(EBlockRead(block_id)) block = perform_sequence([], eff) assert block['content'] == content
def test_perform_block_read(app, app_no_cache): local_content = 'foo' eff = app.perform_block_create(EBlockCreate(local_content)) block_id = perform_sequence([], eff) # Read block in new blocks eff = app.perform_block_read(EBlockRead(block_id)) block = perform_sequence([], eff) assert sorted(list(block.keys())) == ['content', 'id'] assert block['id'] assert block['content'] == local_content remote_content = b'bar' # Read remote block assert app.block_cache.currsize == 0 eff = app.perform_block_read(EBlockRead('123')) sequence = [(EBackendBlockRead('123'), const(Block('123', remote_content)))] block = perform_sequence(sequence, eff) assert sorted(list(block.keys())) == ['content', 'id'] assert block['id'] assert block['content'] == remote_content assert app.block_cache.currsize == 1 # Read remote block with cache disabled assert app_no_cache.block_cache.currsize == 0 eff = app_no_cache.perform_block_read(EBlockRead('123')) sequence = [(EBackendBlockRead('123'), const(Block('123', remote_content)))] block = perform_sequence(sequence, eff) assert sorted(list(block.keys())) == ['content', 'id'] assert block['id'] assert block['content'] == remote_content assert app_no_cache.block_cache.currsize == 0 # Read block in cache eff = app.perform_block_read(EBlockRead('123')) block = perform_sequence([], eff) assert sorted(list(block.keys())) == ['content', 'id'] assert block['id'] assert block['content'] == remote_content # Delete block from cache eff = app.perform_block_delete(EBlockDelete('123')) perform_sequence([], eff) # Not found eff = app.perform_block_read(EBlockRead('123')) sequence = [(EBackendBlockRead('123'), conste(BlockNotFound('Block not found.')))] with pytest.raises(BlockNotFound): block = perform_sequence(sequence, eff) eff = app.perform_block_read(EBlockRead('123')) sequence = [(EBackendBlockRead('123'), conste(BlockError('Block error.')) ) # TODO keep it? usefull with multiple backends... ] with pytest.raises(BlockNotFound): block = perform_sequence(sequence, eff)
def read(self, size=None, offset=0): yield self.flush() # Get data matching_blocks = yield self._find_matching_blocks(size, offset) data = matching_blocks['pre_included_data'] for blocks_and_key in matching_blocks['included_blocks']: block_key = blocks_and_key['key'] decoded_block_key = from_jsonb64(block_key) encryptor = load_sym_key(decoded_block_key) for block_properties in blocks_and_key['blocks']: block = yield Effect(EBlockRead(block_properties['block'])) # Decrypt # TODO: clean this hack if isinstance(block['content'], str): block_content = from_jsonb64(block['content']) else: block_content = from_jsonb64(block['content'].decode()) chunk_data = encryptor.decrypt(block_content) # Check integrity assert digest(chunk_data) == block_properties['digest'] assert len(chunk_data) == block_properties['size'] data += chunk_data data += matching_blocks['post_included_data'] return data
def _find_matching_blocks(self, size=None, offset=0): if size is None: size = sys.maxsize pre_excluded_blocks = [] post_excluded_blocks = [] version = self.get_version() vlob = yield Effect(EVlobRead(self.id, self.read_trust_seed, version)) blob = vlob['blob'] encrypted_blob = from_jsonb64(blob) blob = self.encryptor.decrypt(encrypted_blob) blob = ejson_loads(blob.decode()) pre_excluded_blocks = [] included_blocks = [] post_excluded_blocks = [] cursor = 0 pre_excluded_data = b'' pre_included_data = b'' post_included_data = b'' post_excluded_data = b'' for blocks_and_key in blob: block_key = blocks_and_key['key'] decoded_block_key = from_jsonb64(block_key) encryptor = load_sym_key(decoded_block_key) for block_properties in blocks_and_key['blocks']: cursor += block_properties['size'] if cursor <= offset: if len(pre_excluded_blocks) and pre_excluded_blocks[-1]['key'] == block_key: pre_excluded_blocks[-1]['blocks'].append(block_properties) else: pre_excluded_blocks.append({'blocks': [block_properties], 'key': block_key}) elif cursor > offset and cursor - block_properties['size'] < offset: delta = cursor - offset block = yield Effect(EBlockRead(block_properties['block'])) content = from_jsonb64(block['content']) block_data = encryptor.decrypt(content) pre_excluded_data = block_data[:-delta] pre_included_data = block_data[-delta:][:size] if size < len(block_data[-delta:]): post_excluded_data = block_data[-delta:][size:] elif cursor > offset and cursor <= offset + size: if len(included_blocks) and included_blocks[-1]['key'] == block_key: included_blocks[-1]['blocks'].append(block_properties) else: included_blocks.append({'blocks': [block_properties], 'key': block_key}) elif cursor > offset + size and cursor - block_properties['size'] < offset + size: delta = offset + size - (cursor - block_properties['size']) block = yield Effect(EBlockRead(block_properties['block'])) content = from_jsonb64(block['content']) block_data = encryptor.decrypt(content) post_included_data = block_data[:delta] post_excluded_data = block_data[delta:] else: if len(post_excluded_blocks) and post_excluded_blocks[-1]['key'] == block_key: post_excluded_blocks[-1]['blocks'].append(block_properties) else: post_excluded_blocks.append({'blocks': [block_properties], 'key': block_key}) return { 'pre_excluded_blocks': pre_excluded_blocks, 'pre_excluded_data': pre_excluded_data, 'pre_included_data': pre_included_data, 'included_blocks': included_blocks, 'post_included_data': post_included_data, 'post_excluded_data': post_excluded_data, 'post_excluded_blocks': post_excluded_blocks }
def test_find_matching_blocks(self, file): vlob_id = '1234' block_size = 4096 # Contents contents = {} total_length = 0 for index, length in enumerate([ block_size + 1, block_size - 1, block_size, 2 * block_size + 2, 2 * block_size - 2, 2 * block_size ]): content = b''.join( [str(random.randint(1, 9)).encode() for i in range(0, length)]) contents[index] = content total_length += length # Blocks def generator(): i = 2000 while True: yield str(i) i += 1 gen = generator() blocks = {} block_contents = {} block_id = 2000 for index, content in contents.items(): chunks = [ content[i:i + block_size] for i in range(0, len(content), block_size) ] if not chunks: chunks = [b''] sequence = [] for chunk in chunks: encoded_chunk = to_jsonb64(chunk) sequence.append((EBlockCreate(encoded_chunk), lambda id=id: next(gen))) # TODO dirty block_contents[str(block_id)] = encoded_chunk block_id += 1 blocks[index] = perform_sequence(sequence, file._build_file_blocks(content)) # Create file blob = ejson_dumps([blocks[i] for i in range(0, len(blocks))]).encode() blob = to_jsonb64(blob) # All matching blocks sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 }))] matching_blocks = perform_sequence(sequence, file._find_matching_blocks()) assert matching_blocks == { 'pre_excluded_blocks': [], 'pre_excluded_data': b'', 'pre_included_data': b'', 'included_blocks': [blocks[i] for i in range(0, len(blocks))], 'post_included_data': b'', 'post_excluded_data': b'', 'post_excluded_blocks': [] } # With offset delta = 10 offset = (blocks[0]['blocks'][0]['size'] + blocks[0]['blocks'][1]['size'] + blocks[1]['blocks'][0]['size'] + blocks[2]['blocks'][0]['size'] - delta) sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead('2003'), const({ 'content': block_contents['2003'], 'creation_date': '2012-01-01T00:00:00' }))] matching_blocks = perform_sequence( sequence, file._find_matching_blocks(None, offset)) pre_excluded_data = contents[2][:blocks[2]['blocks'][0]['size'] - delta] pre_included_data = contents[2][-delta:] assert matching_blocks == { 'pre_excluded_blocks': [blocks[0], blocks[1]], 'pre_excluded_data': pre_excluded_data, 'pre_included_data': pre_included_data, 'included_blocks': [blocks[i] for i in range(3, 6)], 'post_included_data': b'', 'post_excluded_data': b'', 'post_excluded_blocks': [] } # With small size delta = 10 size = 5 offset = (blocks[0]['blocks'][0]['size'] + blocks[0]['blocks'][1]['size'] + blocks[1]['blocks'][0]['size'] + blocks[2]['blocks'][0]['size'] - delta) sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead(id='2003'), const({ 'content': block_contents['2003'], 'creation_date': '2012-01-01T00:00:00' }))] matching_blocks = perform_sequence( sequence, file._find_matching_blocks(size, offset)) pre_excluded_data = contents[2][:blocks[2]['blocks'][0]['size'] - delta] pre_included_data = contents[2][-delta:][:size] post_excluded_data = contents[2][-delta:][size:] assert matching_blocks == { 'pre_excluded_blocks': [blocks[0], blocks[1]], 'pre_excluded_data': pre_excluded_data, 'pre_included_data': pre_included_data, 'included_blocks': [], 'post_included_data': b'', 'post_excluded_data': post_excluded_data, 'post_excluded_blocks': [blocks[i] for i in range(3, 6)] } # With big size delta = 10 size = delta size += blocks[3]['blocks'][0]['size'] size += blocks[3]['blocks'][1]['size'] size += blocks[3]['blocks'][2]['size'] size += 2 * delta offset = (blocks[0]['blocks'][0]['size'] + blocks[0]['blocks'][1]['size'] + blocks[1]['blocks'][0]['size'] + blocks[2]['blocks'][0]['size'] - delta) sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead('2003'), const({ 'content': block_contents['2003'], 'creation_date': '2012-01-01T00:00:00' })), (EBlockRead('2007'), const({ 'content': block_contents['2007'], 'creation_date': '2012-01-01T00:00:00' }))] matching_blocks = perform_sequence( sequence, file._find_matching_blocks(size, offset)) pre_excluded_data = contents[2][:-delta] pre_included_data = contents[2][-delta:] post_included_data = contents[4][:2 * delta] post_excluded_data = contents[4][:block_size][2 * delta:] partial_block_4 = deepcopy(blocks[4]) del partial_block_4['blocks'][0] assert matching_blocks == { 'pre_excluded_blocks': [blocks[0], blocks[1]], 'pre_excluded_data': pre_excluded_data, 'pre_included_data': pre_included_data, 'included_blocks': [blocks[3]], 'post_included_data': post_included_data, 'post_excluded_data': post_excluded_data, 'post_excluded_blocks': [partial_block_4, blocks[5]] } # With big size and no delta size = blocks[3]['blocks'][0]['size'] size += blocks[3]['blocks'][1]['size'] size += blocks[3]['blocks'][2]['size'] offset = (blocks[0]['blocks'][0]['size'] + blocks[0]['blocks'][1]['size'] + blocks[1]['blocks'][0]['size'] + blocks[2]['blocks'][0]['size']) sequence = [ (EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), ] matching_blocks = perform_sequence( sequence, file._find_matching_blocks(size, offset)) assert matching_blocks == { 'pre_excluded_blocks': [blocks[0], blocks[1], blocks[2]], 'pre_excluded_data': b'', 'pre_included_data': b'', 'included_blocks': [blocks[3]], 'post_included_data': b'', 'post_excluded_data': b'', 'post_excluded_blocks': [blocks[4], blocks[5]] } # With total size sequence = [ (EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), ] matching_blocks = perform_sequence( sequence, file._find_matching_blocks(total_length, 0)) assert matching_blocks == { 'pre_excluded_blocks': [], 'pre_excluded_data': b'', 'pre_included_data': b'', 'included_blocks': [blocks[i] for i in range(0, 6)], 'post_included_data': b'', 'post_excluded_data': b'', 'post_excluded_blocks': [] }
def test_commit(self, file): vlob_id = '1234' content = b'This is a test content.' block_ids = ['4567', '5678', '6789'] new_vlob = { 'id': '2345', 'read_trust_seed': 'ABC', 'write_trust_seed': 'DEF' } # Original content chunk_1 = content[:5] chunk_2 = content[5:14] chunk_3 = content[14:] blob = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }, { 'block': block_ids[1], 'digest': digest(chunk_2), 'size': len(chunk_2) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000003>') }, { 'blocks': [{ 'block': block_ids[2], 'digest': digest(chunk_3), 'size': len(chunk_3) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000004>') }] blob = ejson_dumps(blob).encode() blob = to_jsonb64(blob) # New content after truncate new_chuck_2 = b'is a' new_block_id = '7654' new_blob = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000003>') }, { 'blocks': [{ 'block': new_block_id, 'digest': digest(new_chuck_2), 'size': len(new_chuck_2) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000003>') }] new_blob = ejson_dumps(new_blob).encode() new_blob = to_jsonb64(new_blob) file.truncate(9) sequence = [ (EVlobRead('1234', '42', 1), const({ 'id': '1234', 'blob': blob, 'version': 1 })), (EVlobRead('1234', '42', 1), const({ 'id': '1234', 'blob': blob, 'version': 1 })), (EBlockRead(block_ids[1]), const({ 'content': to_jsonb64(chunk_2), 'creation_date': '2012-01-01T00:00:00' })), (EBlockCreate(to_jsonb64(new_chuck_2)), const(new_block_id)), (EVlobUpdate(vlob_id, '43', 1, new_blob), noop), (EVlobRead('1234', '42', 1), const({ 'id': '1234', 'blob': new_blob, 'version': 1 })), (EBlockDelete('5678'), conste(BlockNotFound('Block not found.'))), (EBlockDelete('6789'), noop), (EVlobRead('1234', '42', 1), const({ 'id': '1234', 'blob': new_blob, 'version': 1 })), (EBlockSynchronize('4567'), const(True)), (EBlockSynchronize('7654'), const(False)), (EVlobSynchronize('1234'), const(new_vlob)) ] ret = perform_sequence(sequence, file.commit()) new_vlob['key'] = to_jsonb64(b'<dummy-key-00000000000000000002>') assert ret == new_vlob assert file.dirty is False assert file.version == 1
def test_flush(self, file): file.truncate(9) file.write(b'IS', 5) file.write(b'IS a nice test content.', 5) file.dirty = False file.version = 2 vlob_id = '1234' content = b'This is a test content.' block_ids = ['4567', '5678', '6789'] # Original content chunk_1 = content[:5] chunk_2 = content[5:14] chunk_3 = content[14:] blob = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }, { 'block': block_ids[1], 'digest': digest(chunk_2), 'size': len(chunk_2) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000001>') }, { 'blocks': [{ 'block': block_ids[2], 'digest': digest(chunk_3), 'size': len(chunk_3) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000002>') }] blob = ejson_dumps(blob).encode() blob = to_jsonb64(blob) # New content after truncate new_chuck_2 = b'is a' new_block_id = '7654' new_blob = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000001>') }, { 'blocks': [{ 'block': new_block_id, 'digest': digest(new_chuck_2), 'size': len(new_chuck_2) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000003>') }] new_blob = ejson_dumps(new_blob).encode() new_blob = to_jsonb64(new_blob) # New content after write new_block_2_id = '6543' new_chunk_4 = b'IS a nice test content.' new_blob_2 = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000001>') }, { 'blocks': [{ 'block': new_block_2_id, 'digest': digest(new_chunk_4), 'size': len(new_chunk_4) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000004>') }] new_blob_2 = ejson_dumps(new_blob_2).encode() new_blob_2 = to_jsonb64(new_blob_2) sequence = [ ( EVlobRead(vlob_id, '42', 2), # Get blocks const({ 'id': vlob_id, 'blob': blob, 'version': 2 })), ( EVlobRead(vlob_id, '42', 2), # Matching blocks const({ 'id': vlob_id, 'blob': blob, 'version': 2 })), (EBlockRead(block_ids[1]), const({ 'content': to_jsonb64(chunk_2), 'creation_date': '2012-01-01T00:00:00' })), (EBlockCreate(to_jsonb64(new_chuck_2)), const(new_block_id)), (EVlobUpdate(vlob_id, '43', 3, new_blob), noop), ( EVlobRead(vlob_id, '42', 3), # Matching blocks const({ 'id': vlob_id, 'blob': new_blob, 'version': 3 })), (EBlockCreate(to_jsonb64(new_chunk_4)), const(new_block_2_id)), (EVlobUpdate(vlob_id, '43', 3, new_blob_2), noop), (EVlobRead(vlob_id, '42', 3), const({ 'id': vlob_id, 'blob': new_blob_2, 'version': 3 })), (EBlockDelete('5678'), conste(BlockNotFound('Block not found.'))), (EBlockDelete('6789'), noop), ] ret = perform_sequence(sequence, file.flush()) assert ret is None assert file.dirty is True assert file.version == 2
def test_read(self, file): file.dirty = False file.version = 1 # Empty file vlob_id = '1234' chunk_digest = digest(b'') blob = [{ 'blocks': [{ 'block': '4567', 'digest': chunk_digest, 'size': 0 }], 'key': to_jsonb64(b'<dummy-key-00000000000000000001>') }] blob = ejson_dumps(blob).encode() blob = to_jsonb64(blob) sequence = [ (EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), ] read_content = perform_sequence(sequence, file.read()) assert read_content == b'' # Not empty file content = b'This is a test content.' block_ids = ['4567', '5678', '6789'] chunk_1 = content[:5] chunk_2 = content[5:14] chunk_3 = content[14:] blob = [{ 'blocks': [{ 'block': block_ids[0], 'digest': digest(chunk_1), 'size': len(chunk_1) }, { 'block': block_ids[1], 'digest': digest(chunk_2), 'size': len(chunk_2) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000001>') }, { 'blocks': [{ 'block': block_ids[2], 'digest': digest(chunk_3), 'size': len(chunk_3) }], 'key': to_jsonb64(b'<dummy-key-00000000000000000002>') }] blob = ejson_dumps(blob).encode() blob = to_jsonb64(blob) sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead(block_ids[0]), const({ 'content': to_jsonb64(chunk_1), 'creation_date': '2012-01-01T00:00:00' })), (EBlockRead(block_ids[1]), const({ 'content': to_jsonb64(chunk_2), 'creation_date': '2012-01-01T00:00:00' })), (EBlockRead(block_ids[2]), const({ 'content': to_jsonb64(chunk_3), 'creation_date': '2012-01-01T00:00:00' }))] read_content = perform_sequence(sequence, file.read()) assert read_content == content # Offset offset = 5 sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead(block_ids[1]), const({ 'content': to_jsonb64(chunk_2), 'creation_date': '2012-01-01T00:00:00' })), (EBlockRead(block_ids[2]), const({ 'content': to_jsonb64(chunk_3), 'creation_date': '2012-01-01T00:00:00' }))] read_content = perform_sequence(sequence, file.read(offset=offset)) assert read_content == content[offset:] # Size size = 9 sequence = [(EVlobRead(vlob_id, '42', 1), const({ 'id': vlob_id, 'blob': blob, 'version': 1 })), (EBlockRead(block_ids[1]), const({ 'content': to_jsonb64(chunk_2), 'creation_date': '2012-01-01T00:00:00' }))] read_content = perform_sequence(sequence, file.read(offset=offset, size=size)) assert read_content == content[offset:][:size] assert file.dirty is False assert file.version == 1