def testhcu00first(self): ''' Trivial test adding 2 blocks. ''' M1 = self.S KS1 = set() # test emptiness self.assertLen(M1, 0) # add one block data = make_randblock(rand0(8193)) h = M1.add(data) self.assertIn(h, M1) self.assertEqual(M1[h], data) KS1.add(h) self.assertIn(h, M1) mks = set(M1.keys()) self.assertIn(h, mks) mks = set(M1.hashcodes()) ##self.assertEqual(set(M1.hashcodes()), KS1) if mks != KS1: warning( "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks, mks - KS1 ) # add another block data2 = make_randblock(rand0(8193)) h2 = M1.add(data2) KS1.add(h2) mks2 = set(M1.hashcodes()) ##self.assertEqual(mks2, KS1) if mks2 != KS1: warning( "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks2, mks2 - KS1 )
def testhcu03hashcodes_missing(self): ''' Test the hashcodes_missing function. ''' M1 = self.S KS1 = set() for _ in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) KS1.add(h) with MappingStore("M2MappingStore", mapping={}, hashclass=M1.hashclass) as M2: KS2 = set() # construct M2 as a mix of M1 and random new blocks for _ in range(16): if randbool(): data = make_randblock(rand0(8193)) h = M2.add(data) KS2.add(h) else: M1ks = list(M1.hashcodes()) if not M1ks: continue M1hash = M1ks[rand0(len(M1ks))] data = M1[M1hash] h = M2.add(data) self.assertEqual(h, M1hash) self.assertIn(h, M2) KS2.add(h)
def testhcu01test_hashcodes_from(self): ''' Test the hashcodes_from method. ''' # fill map1 with 16 random data blocks M1 = self.S hashcodes_added = set() for _ in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) hashcodes_added.add(h) # make a block not in the map data2 = make_randblock(rand0(8193)) hashcode_other = self.S.hash(data2) self.assertNotIn( hashcode_other, hashcodes_added, "abort test: %s in previous blocks" % (hashcode_other,) ) # # extract hashes using Store.hashcodes_from, check results # ks = sorted(hashcodes_added) for start_hashcode in [None] + list(hashcodes_added) + [hashcode_other]: with self.subTest(M1type=type(M1).__name__, start_hashcode=start_hashcode): hashcodes_from = list(M1.hashcodes_from(start_hashcode=start_hashcode)) self.assertIsOrdered(hashcodes_from, strict=True) if start_hashcode is not None: for h in hashcodes_from: self.assertGreaterEqual( h, start_hashcode, "NOT start_hashocde=%s <= h=%s" % (start_hashcode, h) ) self.assertTrue( all(map(lambda h: h >= start_hashcode, hashcodes_from)) )
def test_shuffled_randomblocks(self): ''' Save RUN_SIZE random blocks, close, retrieve in random order. ''' # save random blocks to a file blocks = {} with open(self.pathname, 'wb') as f: for n in range(RUN_SIZE): with self.subTest(put_block_n=n): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) dr = DataRecord(data) offset = f.tell() blocks[offset] = data f.write(bytes(dr)) # shuffle the block offsets offsets = list(blocks.keys()) random.shuffle(offsets) # retrieve the blocks in random order, check for correct content with open(self.pathname, 'rb') as f: for n, offset in enumerate(offsets): with self.subTest(shuffled_offsets_n=n, offset=offset): f.seek(offset) bfr = CornuCopyBuffer.from_file(f) dr = DataRecord.parse(bfr) data = dr.data self.assertTrue(data == blocks[offset])
def test_shuffled_randomblocks(self): ''' Save RUN_SIZE random blocks, close, retrieve in random order. ''' for cls in RawBackingFile, CompressibleBackingFile: for _, hashclass in sorted(HASHCLASS_BY_NAME.items()): with self.subTest(cls=cls, hashclass=hashclass): with NamedTemporaryFile(dir='.', prefix=cls.__name__ + '-') as T: blocks = {} index = BinaryHashCodeIndex( hashclass=hashclass, binary_index={}, index_entry_class=BackingFileIndexEntry) total_length = 0 # open and save data with cls(T.name, hashclass=hashclass, index=index) as bf: for _ in range(RUN_SIZE): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) h = bf.add(data) blocks[h] = data total_length += len(data) # reopen and retrieve with cls(T.name, hashclass=hashclass, index=index) as bf: # retrieve in random order hashcodes = list(blocks.keys()) random.shuffle(hashcodes) for h in hashcodes: data = bf[h] self.assertEqual(data, blocks[h])
def test_shuffled_randomblocks_vtd(self): ''' Like test_shuffled_randomblocks but using a .vtd file and binary index file: save RUN_SIZE random blocks, close, retrieve in random order. ''' for _, hashclass in sorted(HASHCLASS_BY_NAME.items()): with self.subTest(hashclass=hashclass): with TemporaryDirectory(dir='.') as TDname: with NamedTemporaryFile(dir=TDname, prefix='VTDStore-', suffix='.vtd') as T: blocks = {} total_length = 0 # open and save data with VTDStore(T.name, T.name, hashclass=hashclass) as S: for _ in range(RUN_SIZE): data = make_randblock(rand0(MAX_BLOCK_SIZE + 1)) h = S.add(data) blocks[h] = data total_length += len(data) # reopen and retrieve with VTDStore(T.name, T.name, hashclass=hashclass) as S: # retrieve in random order hashcodes = list(blocks.keys()) random.shuffle(hashcodes) for h in hashcodes: data = S[h] self.assertEqual(data, blocks[h])
def test04random_mixed(self): ''' Fill both maps with some overlap. ''' ks1 = set() ks2 = set() for n in range(32): data = make_randblock(rand0(8193)) choice = randint(0, 2) if choice <= 1: h1 = self.map1.add(data) ks1.add(h1) if choice >= 1: h2 = self.map2.add(data) ks2.add(h2) for window_size in 1, 7, 16, 23, 32, 1024: with self.subTest(window_size=window_size): # items in map1 not in map2 missing = set( self.miss_generator(self.map2, self.map1, window_size=window_size) ) self.assertEqual(missing, ks1 - ks2) # items in map2 not in map1 missing = set( self.miss_generator(self.map1, self.map2, window_size=window_size) ) self.assertEqual(missing, ks2 - ks1)
def test02random1only(self): ''' Fill map1 with random blocks, nothing in map2. ''' for n in range(32): data = make_randblock(rand0(8193)) h1 = self.map1.add(data) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), 0)
def test03random2only(self): ''' Fill map2 with random blocks, nothing in map1. ''' ks2 = set() for n in range(32): data = make_randblock(rand0(8193)) h2 = self.map2.add(data) ks2.add(h2) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), len(ks2))
def test01random_identical(self): ''' Fill map1 and map2 with identical some random blocks. ''' for _ in range(32): data = make_randblock(rand0(8193)) h1 = self.map1.add(data) h2 = self.map2.add(data) self.assertEqual(h1, h2) missing = list(self.miss_generator(self.map1, self.map2)) self.assertEqual(len(missing), 0)
def test02add_get(self): ''' Add random chunks, get them back. ''' S = self.S self.assertLen(S, 0) random_chunk_map = {} for _ in range(16): size = random.randint(127, 16384) data = make_randblock(size) h = S.hash(data) h2 = S.add(data) self.assertEqual(h, h2) random_chunk_map[h] = data for h in random_chunk_map: chunk = S.get(h) self.assertIsNot(chunk, None) self.assertEqual(chunk, random_chunk_map[h])
def test01add_new_block(self): ''' Add a block and check that it worked. ''' S = self.S # compute block hash but do not store size = random.randint(127, 16384) data = make_randblock(size) h = S.hash(data) ok = S.contains(h) self.assertFalse(ok) self.assertNotIn(h, S) # now add the block h2 = S.add(data) self.assertEqual(h, h2) ok = S.contains(h) self.assertTrue(ok) self.assertIn(h, S)
def test02full_duplex_random_payloads(self): ''' Throw 16 packets up, collect responses after requests queued. ''' rqs = [] for _ in range(16): size = rand0(16385) data = make_randblock(size) flags = rand0(65537) R = self.local_conn.request(0, flags, data, self._decode_response, 0) rqs.append((R, flags, data)) random.shuffle(rqs) for rq in rqs: R, flags, data = rq ok, flags, payload = R() self.assertTrue(ok, "response status not ok") self.assertEqual(flags, 0x11) self.assertEqual(payload, bytes(reversed(data)))
def testhcu02hashcodes(self): ''' Various tests. ''' M1 = self.S KS1 = set() # add 16 random blocks to the map with some sanity checks along the way for n in range(16): data = make_randblock(rand0(8193)) h = M1.add(data) self.assertIn(h, M1) self.assertNotIn(h, KS1) KS1.add(h) sleep(0.1) ##self.assertLen(M1, n + 1) ##self.assertEqual(len(KS1), n + 1) ##self.assertEqual(set(iter(M1)), KS1) ##self.assertEqual(set(M1.hashcodes()), KS1) # asking for 0 hashcodes is forbidden with self.assertRaises(ValueError): # NB: using list() to iterate over the generator, thus executing .hashcodes hs = list(M1.hashcodes(length=0)) # fetch the leading n hashcodes from the map, with and without `after` for after in False, True: with self.subTest(after=after): for n in range(1, 16): if after: start_hashcode = None for mincode in accumulate(iter(M1), min): start_hashcode = mincode if start_hashcode is None: # no start_hashcode, skip when after is true continue else: start_hashcode = None hs = list( M1.hashcodes( start_hashcode=start_hashcode, after=after, length=n ) ) self.assertIsOrdered(hs, False) hn = min(n, 15 if after else 16) self.assertEqual(len(hs), hn) # traverse the map in various sized steps, including random sorted_keys = sorted(KS1) for step_size in 1, 2, 3, 7, 8, 15, 16, None: with self.subTest(step_size=step_size): start_hashcode = None keys_offset = 0 seen = set() while keys_offset < len(sorted_keys): if step_size is None: n = random.randint(1, 7) else: n = step_size with self.subTest( start_hashcode=start_hashcode, keys_offset=keys_offset, n=n, ): after = start_hashcode is not None hs = list( M1.hashcodes( start_hashcode=start_hashcode, length=n, after=after ) ) # verify that no key has been seen before for h in hs: self.assertNotIn(h, seen) # verify ordering of returned list self.assertIsOrdered(hs, strict=True) # verify that least key is > start_hashcode if start_hashcode is not None: self.assertLess(start_hashcode, hs[0]) hn = min(len(sorted_keys) - keys_offset, n) self.assertEqual(len(hs), hn) # verify returned keys against master list for i in range(hn): self.assertEqual(sorted_keys[keys_offset + i], hs[i]) # note these keys, advance seen.update(hs) keys_offset += hn start_hashcode = hs[-1] # verify that all keys have been retrieved self.assertEqual(sorted_keys, sorted(seen))