def testhcu03hashcodes_missing(self): ''' Test the hashcodes_missing function. ''' M1 = self.S KS1 = set() for _ in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) KS1.add(h) with MappingStore("M2MappingStore", mapping={}, hashclass=M1.hashclass) as M2: KS2 = set() # construct M2 as a mix of M1 and random new blocks for _ in range(16): if randbool(): data = make_randblock(rand0(8193)) h = M2.add(data) KS2.add(h) else: M1ks = list(M1.hashcodes()) if not M1ks: continue M1hash = M1ks[rand0(len(M1ks))] data = M1[M1hash] h = M2.add(data) self.assertEqual(h, M1hash) self.assertIn(h, M2) KS2.add(h)
def testhcu01test_hashcodes_from(self): ''' Test the hashcodes_from method. ''' # fill map1 with 16 random data blocks M1 = self.S hashcodes_added = set() for _ in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) hashcodes_added.add(h) # make a block not in the map data2 = make_randblock(rand0(8193)) hashcode_other = self.S.hash(data2) self.assertNotIn( hashcode_other, hashcodes_added, "abort test: %s in previous blocks" % (hashcode_other,) ) # # extract hashes using Store.hashcodes_from, check results # ks = sorted(hashcodes_added) for start_hashcode in [None] + list(hashcodes_added) + [hashcode_other]: with self.subTest(M1type=type(M1).__name__, start_hashcode=start_hashcode): hashcodes_from = list(M1.hashcodes_from(start_hashcode=start_hashcode)) self.assertIsOrdered(hashcodes_from, strict=True) if start_hashcode is not None: for h in hashcodes_from: self.assertGreaterEqual( h, start_hashcode, "NOT start_hashocde=%s <= h=%s" % (start_hashcode, h) ) self.assertTrue( all(map(lambda h: h >= start_hashcode, hashcodes_from)) )
def testhcu00first(self): ''' Trivial test adding 2 blocks. ''' M1 = self.S KS1 = set() # test emptiness self.assertLen(M1, 0) # add one block data = make_randblock(rand0(8193)) h = M1.add(data) self.assertIn(h, M1) self.assertEqual(M1[h], data) KS1.add(h) self.assertIn(h, M1) mks = set(M1.keys()) self.assertIn(h, mks) mks = set(M1.hashcodes()) ##self.assertEqual(set(M1.hashcodes()), KS1) if mks != KS1: warning( "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks, mks - KS1 ) # add another block data2 = make_randblock(rand0(8193)) h2 = M1.add(data2) KS1.add(h2) mks2 = set(M1.hashcodes()) ##self.assertEqual(mks2, KS1) if mks2 != KS1: warning( "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks2, mks2 - KS1 )
def _make_random_Block(self, block_type=None, leaf_only=False): with self.subTest(task="_make_random_Block", block_type=block_type, leaf_only=leaf_only): if block_type is None: choices = [ BlockType.BT_HASHCODE, BlockType.BT_RLE, BlockType.BT_LITERAL, ] if not leaf_only: choices.append(BlockType.BT_SUBBLOCK) choices.append(BlockType.BT_INDIRECT) block_type = choice(choices) with self.subTest( subtask="instantiate", block_type=block_type, ): if block_type == BlockType.BT_INDIRECT: subblocks = [ self._make_random_Block() for _ in range(rand0(8)) ] B = IndirectBlock.from_subblocks(subblocks, force=True) elif block_type == BlockType.BT_HASHCODE: rs = next(self.random_chunk_source) B = Block(data=rs) # we can get a literal block back - this is acceptable if B.type == BlockType.BT_LITERAL: block_type = BlockType.BT_LITERAL elif block_type == BlockType.BT_RLE: rb = bytes((rand0(256), )) B = RLEBlock(rand0(65535), rb) elif block_type == BlockType.BT_LITERAL: rs = next(self.random_chunk_source) B = LiteralBlock(data=rs) elif block_type == BlockType.BT_SUBBLOCK: B2 = self._make_random_Block() self._verify_block(B2) if B2: suboffset = rand0(B2.span) subspan = rand0(B2.span - suboffset) else: suboffset = 0 subspan = 0 B = SubBlock(B2, suboffset, subspan) # SubBlock returns an empty literal for an empty subblock if subspan == 0: block_type = BlockType.BT_LITERAL else: raise ValueError("unknow block type") self.assertEqual( B.type, block_type, "new Block is wrong type: %r, should be %r" % ( B.type, block_type, )) self._verify_block(B) return B
def test10IndirectBlock(self): ''' Construct various random indirect blocks and test. ''' S = self.S with S: for _ in range(64): with self.subTest(loop=_): chunks = [] subblocks = [] total_length = 0 for _ in range(rand0(16)): B = self._make_random_Block() subblocks.append(B) total_length += B.span chunks.append(B.get_spanned_data()) fullblock = b''.join(chunks) IB = IndirectBlock.from_subblocks(subblocks=subblocks, force=True) self._verify_block(IB, recurse=True) IBspan = IB.span self.assertEqual( IBspan, total_length, "IBspan(%d) != total_length(%d)" % (IB.span, total_length)) IBH = IB.superblock.hashcode IBdata = IB.get_spanned_data() self.assertEqual(len(IBdata), total_length) self.assertEqual(IBdata, fullblock) # refetch block by hashcode IB2 = IndirectBlock.from_hashcode(hashcode=IBH, span=len(IBdata)) self._verify_block(IB2, recurse=True) IB2data = IB2.get_spanned_data() self.assertEqual( IBdata, IB2data, "IB: %s\nIB2: %s" % (hexify(IBdata), hexify(IB2data))) for _ in range(32): with self.subTest(loop2=_): start = rand0(len(IB) + 1) length = rand0(len(IB) - start + 1) if start < len(IB) else 0 end = start + length with self.subTest(start=start, end=end): chunk1 = IB[start:end] self.assertEqual(len(chunk1), length) chunk1a = fullblock[start:end] self.assertEqual(len(chunk1a), length) self.assertEqual( chunk1, chunk1a, "IB[%d:%d] != fullblock[%d:%d]" % (start, end, start, end)) chunk2 = IB2[start:end] self.assertEqual(len(chunk2), length) self.assertEqual( chunk1, chunk2, "IB[%d:%d] != IB2[%d:%d]" % (start, end, start, end))
def test_shuffled_randomblocks(self): ''' Save RUN_SIZE random blocks, close, retrieve in random order. ''' # save random blocks to a file blocks = {} with open(self.pathname, 'wb') as f: for n in range(RUN_SIZE): with self.subTest(put_block_n=n): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) dr = DataRecord(data) offset = f.tell() blocks[offset] = data f.write(bytes(dr)) # shuffle the block offsets offsets = list(blocks.keys()) random.shuffle(offsets) # retrieve the blocks in random order, check for correct content with open(self.pathname, 'rb') as f: for n, offset in enumerate(offsets): with self.subTest(shuffled_offsets_n=n, offset=offset): f.seek(offset) bfr = CornuCopyBuffer.from_file(f) dr = DataRecord.parse(bfr) data = dr.data self.assertTrue(data == blocks[offset])
def test_shuffled_randomblocks(self): ''' Save RUN_SIZE random blocks, close, retrieve in random order. ''' for cls in RawBackingFile, CompressibleBackingFile: for _, hashclass in sorted(HASHCLASS_BY_NAME.items()): with self.subTest(cls=cls, hashclass=hashclass): with NamedTemporaryFile(dir='.', prefix=cls.__name__ + '-') as T: blocks = {} index = BinaryHashCodeIndex( hashclass=hashclass, binary_index={}, index_entry_class=BackingFileIndexEntry) total_length = 0 # open and save data with cls(T.name, hashclass=hashclass, index=index) as bf: for _ in range(RUN_SIZE): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) h = bf.add(data) blocks[h] = data total_length += len(data) # reopen and retrieve with cls(T.name, hashclass=hashclass, index=index) as bf: # retrieve in random order hashcodes = list(blocks.keys()) random.shuffle(hashcodes) for h in hashcodes: data = bf[h] self.assertEqual(data, blocks[h])
def test_shuffled_randomblocks_vtd(self): ''' Like test_shuffled_randomblocks but using a .vtd file and binary index file: save RUN_SIZE random blocks, close, retrieve in random order. ''' for _, hashclass in sorted(HASHCLASS_BY_NAME.items()): with self.subTest(hashclass=hashclass): with TemporaryDirectory(dir='.') as TDname: with NamedTemporaryFile(dir=TDname, prefix='VTDStore-', suffix='.vtd') as T: blocks = {} total_length = 0 # open and save data with VTDStore(T.name, T.name, hashclass=hashclass) as S: for _ in range(RUN_SIZE): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) h = S.add(data) blocks[h] = data total_length += len(data) # reopen and retrieve with VTDStore(T.name, T.name, hashclass=hashclass) as S: # retrieve in random order hashcodes = list(blocks.keys()) random.shuffle(hashcodes) for h in hashcodes: data = S[h] self.assertEqual(data, blocks[h])
def test04random_mixed(self): ''' Fill both maps with some overlap. ''' ks1 = set() ks2 = set() for n in range(32): data = make_randblock(rand0(8193)) choice = randint(0, 2) if choice <= 1: h1 = self.map1.add(data) ks1.add(h1) if choice >= 1: h2 = self.map2.add(data) ks2.add(h2) for window_size in 1, 7, 16, 23, 32, 1024: with self.subTest(window_size=window_size): # items in map1 not in map2 missing = set( self.miss_generator(self.map2, self.map1, window_size=window_size) ) self.assertEqual(missing, ks1 - ks2) # items in map2 not in map1 missing = set( self.miss_generator(self.map1, self.map2, window_size=window_size) ) self.assertEqual(missing, ks2 - ks1)
def test02full_duplex_random_payloads(self): ''' Throw 16 packets up, collect responses after requests queued. ''' rqs = [] for _ in range(16): size = rand0(16385) data = make_randblock(size) flags = rand0(65537) R = self.local_conn.request(0, flags, data, self._decode_response, 0) rqs.append((R, flags, data)) random.shuffle(rqs) for rq in rqs: R, flags, data = rq ok, flags, payload = R() self.assertTrue(ok, "response status not ok") self.assertEqual(flags, 0x11) self.assertEqual(payload, bytes(reversed(data)))
def test02random1only(self): ''' Fill map1 with random blocks, nothing in map2. ''' for n in range(32): data = make_randblock(rand0(8193)) h1 = self.map1.add(data) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), 0)
def test03random2only(self): ''' Fill map2 with random blocks, nothing in map1. ''' ks2 = set() for n in range(32): data = make_randblock(rand0(8193)) h2 = self.map2.add(data) ks2.add(h2) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), len(ks2))
def test01random_identical(self): ''' Fill map1 and map2 with identical some random blocks. ''' for _ in range(32): data = make_randblock(rand0(8193)) h1 = self.map1.add(data) h2 = self.map2.add(data) self.assertEqual(h1, h2) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), 0)
def test02RoundTripSingleBlock(self): ''' Generate various block types, serialise then deserialise each. ''' S = self.S with S: for block_type in BlockType.BT_HASHCODE, BlockType.BT_RLE, \ BlockType.BT_LITERAL, BlockType.BT_SUBBLOCK, \ BlockType.BT_INDIRECT: size = rand0(16385) with self.subTest(type=block_type, size=size): B = self._make_random_Block(block_type=block_type) Bserial = B.encode() BR2, offset = BlockRecord.parse_bytes(Bserial) B2 = BR2.block self.assertEqual( offset, len(Bserial), "decoded %d bytes but len(Bserial)=%d" % (offset, len(Bserial))) self._verify_block(B2) if block_type != BlockType.BT_INDIRECT: self.assertEqual(B.type, B2.type, "block types differ") self.assertEqual(B.indirect, B2.indirect, "block indirects differ") self.assertEqual(B.span, B2.span, "span lengths differ") self.assertEqual(B.get_spanned_data(), B2.get_spanned_data(), "spanned data differ") Btype = B2.type if Btype == BlockType.BT_INDIRECT: self.assertTrue(B.indirect) self._verify_block(B2.superblock) else: self.assertFalse(B.indirect) self.assertEqual(B.span, sum(map(len, B))) if Btype == BlockType.BT_HASHCODE: self.assertEqual(B.hashcode, B2.hashcode) elif Btype == BlockType.BT_RLE: self.assertEqual(B2.get_spanned_data(), B2.octet * B2.span) elif Btype == BlockType.BT_LITERAL: raise unittest.SkipTest( "no specific test for LiteralBlock") elif Btype == BlockType.BT_SUBBLOCK: self._verify_block(B2.superblock) else: raise unittest.SkipTest( "no type specific tests for Block type %r" % (block_type, ))
def testhcu02hashcodes(self): ''' Various tests. ''' M1 = self.S KS1 = set() # add 16 random blocks to the map with some sanity checks along the way for n in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) self.assertIn(h, M1) self.assertNotIn(h, KS1) KS1.add(h) sleep(0.1) ##self.assertLen(M1, n + 1) ##self.assertEqual(len(KS1), n + 1) ##self.assertEqual(set(iter(M1)), KS1) ##self.assertEqual(set(M1.hashcodes()), KS1) # asking for 0 hashcodes is forbidden with self.assertRaises(ValueError): # NB: using list() to iterate over the generator, thus executing .hashcodes hs = list(M1.hashcodes(length=0)) # fetch the leading n hashcodes from the map, with and without `after` for after in False, True: with self.subTest(after=after): for n in range(1, 16): if after: start_hashcode = None for mincode in accumulate(iter(M1), min): start_hashcode = mincode if start_hashcode is None: # no start_hashcode, skip when after is true continue else: start_hashcode = None hs = list( M1.hashcodes( start_hashcode=start_hashcode, after=after, length=n ) ) self.assertIsOrdered(hs, False) hn = min(n, 15 if after else 16) self.assertEqual(len(hs), hn) # traverse the map in various sized steps, including random sorted_keys = sorted(KS1) for step_size in 1, 2, 3, 7, 8, 15, 16, None: with self.subTest(step_size=step_size): start_hashcode = None keys_offset = 0 seen = set() while keys_offset < len(sorted_keys): if step_size is None: n = random.randint(1, 7) else: n = step_size with self.subTest( start_hashcode=start_hashcode, keys_offset=keys_offset, n=n, ): after = start_hashcode is not None hs = list( M1.hashcodes( start_hashcode=start_hashcode, length=n, after=after ) ) # verify that no key has been seen before for h in hs: self.assertNotIn(h, seen) # verify ordering of returned list self.assertIsOrdered(hs, strict=True) # verify that least key is > start_hashcode if start_hashcode is not None: self.assertLess(start_hashcode, hs[0]) hn = min(len(sorted_keys) - keys_offset, n) self.assertEqual(len(hs), hn) # verify returned keys against master list for i in range(hn): self.assertEqual(sorted_keys[keys_offset + i], hs[i]) # note these keys, advance seen.update(hs) keys_offset += hn start_hashcode = hs[-1] # verify that all keys have been retrieved self.assertEqual(sorted_keys, sorted(seen))