Exemple #1
0
 def testhcu03hashcodes_missing(self):
   ''' Test the hashcodes_missing function.
   '''
   M1 = self.S
   KS1 = set()
   for _ in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     KS1.add(h)
   with MappingStore("M2MappingStore", mapping={},
                     hashclass=M1.hashclass) as M2:
     KS2 = set()
     # construct M2 as a mix of M1 and random new blocks
     for _ in range(16):
       if randbool():
         data = make_randblock(rand0(8193))
         h = M2.add(data)
         KS2.add(h)
       else:
         M1ks = list(M1.hashcodes())
         if not M1ks:
           continue
         M1hash = M1ks[rand0(len(M1ks))]
         data = M1[M1hash]
         h = M2.add(data)
         self.assertEqual(h, M1hash)
         self.assertIn(h, M2)
         KS2.add(h)
Exemple #2
0
 def testhcu01test_hashcodes_from(self):
   ''' Test the hashcodes_from method.
   '''
   # fill map1 with 16 random data blocks
   M1 = self.S
   hashcodes_added = set()
   for _ in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     hashcodes_added.add(h)
   # make a block not in the map
   data2 = make_randblock(rand0(8193))
   hashcode_other = self.S.hash(data2)
   self.assertNotIn(
       hashcode_other, hashcodes_added,
       "abort test: %s in previous blocks" % (hashcode_other,)
   )
   #
   # extract hashes using Store.hashcodes_from, check results
   #
   ks = sorted(hashcodes_added)
   for start_hashcode in [None] + list(hashcodes_added) + [hashcode_other]:
     with self.subTest(M1type=type(M1).__name__,
                       start_hashcode=start_hashcode):
       hashcodes_from = list(M1.hashcodes_from(start_hashcode=start_hashcode))
       self.assertIsOrdered(hashcodes_from, strict=True)
       if start_hashcode is not None:
         for h in hashcodes_from:
           self.assertGreaterEqual(
               h, start_hashcode,
               "NOT start_hashocde=%s <= h=%s" % (start_hashcode, h)
           )
         self.assertTrue(
             all(map(lambda h: h >= start_hashcode, hashcodes_from))
         )
Exemple #3
0
 def testhcu00first(self):
   ''' Trivial test adding 2 blocks.
   '''
   M1 = self.S
   KS1 = set()
   # test emptiness
   self.assertLen(M1, 0)
   # add one block
   data = make_randblock(rand0(8193))
   h = M1.add(data)
   self.assertIn(h, M1)
   self.assertEqual(M1[h], data)
   KS1.add(h)
   self.assertIn(h, M1)
   mks = set(M1.keys())
   self.assertIn(h, mks)
   mks = set(M1.hashcodes())
   ##self.assertEqual(set(M1.hashcodes()), KS1)
   if mks != KS1:
     warning(
         "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks,
         mks - KS1
     )
   # add another block
   data2 = make_randblock(rand0(8193))
   h2 = M1.add(data2)
   KS1.add(h2)
   mks2 = set(M1.hashcodes())
   ##self.assertEqual(mks2, KS1)
   if mks2 != KS1:
     warning(
         "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks2,
         mks2 - KS1
     )
Exemple #4
0
 def _make_random_Block(self, block_type=None, leaf_only=False):
     with self.subTest(task="_make_random_Block",
                       block_type=block_type,
                       leaf_only=leaf_only):
         if block_type is None:
             choices = [
                 BlockType.BT_HASHCODE,
                 BlockType.BT_RLE,
                 BlockType.BT_LITERAL,
             ]
             if not leaf_only:
                 choices.append(BlockType.BT_SUBBLOCK)
                 choices.append(BlockType.BT_INDIRECT)
             block_type = choice(choices)
         with self.subTest(
                 subtask="instantiate",
                 block_type=block_type,
         ):
             if block_type == BlockType.BT_INDIRECT:
                 subblocks = [
                     self._make_random_Block() for _ in range(rand0(8))
                 ]
                 B = IndirectBlock.from_subblocks(subblocks, force=True)
             elif block_type == BlockType.BT_HASHCODE:
                 rs = next(self.random_chunk_source)
                 B = Block(data=rs)
                 # we can get a literal block back - this is acceptable
                 if B.type == BlockType.BT_LITERAL:
                     block_type = BlockType.BT_LITERAL
             elif block_type == BlockType.BT_RLE:
                 rb = bytes((rand0(256), ))
                 B = RLEBlock(rand0(65535), rb)
             elif block_type == BlockType.BT_LITERAL:
                 rs = next(self.random_chunk_source)
                 B = LiteralBlock(data=rs)
             elif block_type == BlockType.BT_SUBBLOCK:
                 B2 = self._make_random_Block()
                 self._verify_block(B2)
                 if B2:
                     suboffset = rand0(B2.span)
                     subspan = rand0(B2.span - suboffset)
                 else:
                     suboffset = 0
                     subspan = 0
                 B = SubBlock(B2, suboffset, subspan)
                 # SubBlock returns an empty literal for an empty subblock
                 if subspan == 0:
                     block_type = BlockType.BT_LITERAL
             else:
                 raise ValueError("unknow block type")
             self.assertEqual(
                 B.type, block_type,
                 "new Block is wrong type: %r, should be %r" % (
                     B.type,
                     block_type,
                 ))
             self._verify_block(B)
         return B
Exemple #5
0
 def test10IndirectBlock(self):
     ''' Construct various random indirect blocks and test.
 '''
     S = self.S
     with S:
         for _ in range(64):
             with self.subTest(loop=_):
                 chunks = []
                 subblocks = []
                 total_length = 0
                 for _ in range(rand0(16)):
                     B = self._make_random_Block()
                     subblocks.append(B)
                     total_length += B.span
                     chunks.append(B.get_spanned_data())
                 fullblock = b''.join(chunks)
                 IB = IndirectBlock.from_subblocks(subblocks=subblocks,
                                                   force=True)
                 self._verify_block(IB, recurse=True)
                 IBspan = IB.span
                 self.assertEqual(
                     IBspan, total_length,
                     "IBspan(%d) != total_length(%d)" %
                     (IB.span, total_length))
                 IBH = IB.superblock.hashcode
                 IBdata = IB.get_spanned_data()
                 self.assertEqual(len(IBdata), total_length)
                 self.assertEqual(IBdata, fullblock)
                 # refetch block by hashcode
                 IB2 = IndirectBlock.from_hashcode(hashcode=IBH,
                                                   span=len(IBdata))
                 self._verify_block(IB2, recurse=True)
                 IB2data = IB2.get_spanned_data()
                 self.assertEqual(
                     IBdata, IB2data,
                     "IB:  %s\nIB2: %s" % (hexify(IBdata), hexify(IB2data)))
                 for _ in range(32):
                     with self.subTest(loop2=_):
                         start = rand0(len(IB) + 1)
                         length = rand0(len(IB) - start +
                                        1) if start < len(IB) else 0
                         end = start + length
                         with self.subTest(start=start, end=end):
                             chunk1 = IB[start:end]
                             self.assertEqual(len(chunk1), length)
                             chunk1a = fullblock[start:end]
                             self.assertEqual(len(chunk1a), length)
                             self.assertEqual(
                                 chunk1, chunk1a,
                                 "IB[%d:%d] != fullblock[%d:%d]" %
                                 (start, end, start, end))
                             chunk2 = IB2[start:end]
                             self.assertEqual(len(chunk2), length)
                             self.assertEqual(
                                 chunk1, chunk2, "IB[%d:%d] != IB2[%d:%d]" %
                                 (start, end, start, end))
 def test_shuffled_randomblocks(self):
     ''' Save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     # save random blocks to a file
     blocks = {}
     with open(self.pathname, 'wb') as f:
         for n in range(RUN_SIZE):
             with self.subTest(put_block_n=n):
                 data = make_randblock(rand0(MAX_BLOCK_SIZE + 1))
                 dr = DataRecord(data)
                 offset = f.tell()
                 blocks[offset] = data
                 f.write(bytes(dr))
     # shuffle the block offsets
     offsets = list(blocks.keys())
     random.shuffle(offsets)
     # retrieve the blocks in random order, check for correct content
     with open(self.pathname, 'rb') as f:
         for n, offset in enumerate(offsets):
             with self.subTest(shuffled_offsets_n=n, offset=offset):
                 f.seek(offset)
                 bfr = CornuCopyBuffer.from_file(f)
                 dr = DataRecord.parse(bfr)
                 data = dr.data
                 self.assertTrue(data == blocks[offset])
Exemple #7
0
 def test_shuffled_randomblocks(self):
     ''' Save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     for cls in RawBackingFile, CompressibleBackingFile:
         for _, hashclass in sorted(HASHCLASS_BY_NAME.items()):
             with self.subTest(cls=cls, hashclass=hashclass):
                 with NamedTemporaryFile(dir='.',
                                         prefix=cls.__name__ + '-') as T:
                     blocks = {}
                     index = BinaryHashCodeIndex(
                         hashclass=hashclass,
                         binary_index={},
                         index_entry_class=BackingFileIndexEntry)
                     total_length = 0
                     # open and save data
                     with cls(T.name, hashclass=hashclass,
                              index=index) as bf:
                         for _ in range(RUN_SIZE):
                             data = make_randblock(rand0(MAX_BLOCK_SIZE +
                                                         1))
                             h = bf.add(data)
                             blocks[h] = data
                             total_length += len(data)
                     # reopen and retrieve
                     with cls(T.name, hashclass=hashclass,
                              index=index) as bf:
                         # retrieve in random order
                         hashcodes = list(blocks.keys())
                         random.shuffle(hashcodes)
                         for h in hashcodes:
                             data = bf[h]
                             self.assertEqual(data, blocks[h])
Exemple #8
0
 def test_shuffled_randomblocks_vtd(self):
     ''' Like test_shuffled_randomblocks but using a .vtd file and binary index file:
     save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     for _, hashclass in sorted(HASHCLASS_BY_NAME.items()):
         with self.subTest(hashclass=hashclass):
             with TemporaryDirectory(dir='.') as TDname:
                 with NamedTemporaryFile(dir=TDname,
                                         prefix='VTDStore-',
                                         suffix='.vtd') as T:
                     blocks = {}
                     total_length = 0
                     # open and save data
                     with VTDStore(T.name, T.name,
                                   hashclass=hashclass) as S:
                         for _ in range(RUN_SIZE):
                             data = make_randblock(rand0(MAX_BLOCK_SIZE +
                                                         1))
                             h = S.add(data)
                             blocks[h] = data
                             total_length += len(data)
                     # reopen and retrieve
                     with VTDStore(T.name, T.name,
                                   hashclass=hashclass) as S:
                         # retrieve in random order
                         hashcodes = list(blocks.keys())
                         random.shuffle(hashcodes)
                         for h in hashcodes:
                             data = S[h]
                             self.assertEqual(data, blocks[h])
 def test04random_mixed(self):
   ''' Fill both maps with some overlap.
   '''
   ks1 = set()
   ks2 = set()
   for n in range(32):
     data = make_randblock(rand0(8193))
     choice = randint(0, 2)
     if choice <= 1:
       h1 = self.map1.add(data)
       ks1.add(h1)
     if choice >= 1:
       h2 = self.map2.add(data)
       ks2.add(h2)
   for window_size in 1, 7, 16, 23, 32, 1024:
     with self.subTest(window_size=window_size):
       # items in map1 not in map2
       missing = set(
           self.miss_generator(self.map2, self.map1, window_size=window_size)
       )
       self.assertEqual(missing, ks1 - ks2)
       # items in map2 not in map1
       missing = set(
           self.miss_generator(self.map1, self.map2, window_size=window_size)
       )
       self.assertEqual(missing, ks2 - ks1)
Exemple #10
0
 def test02full_duplex_random_payloads(self):
   ''' Throw 16 packets up, collect responses after requests queued.
   '''
   rqs = []
   for _ in range(16):
     size = rand0(16385)
     data = make_randblock(size)
     flags = rand0(65537)
     R = self.local_conn.request(0, flags, data, self._decode_response, 0)
     rqs.append((R, flags, data))
   random.shuffle(rqs)
   for rq in rqs:
     R, flags, data = rq
     ok, flags, payload = R()
     self.assertTrue(ok, "response status not ok")
     self.assertEqual(flags, 0x11)
     self.assertEqual(payload, bytes(reversed(data)))
Exemple #11
0
 def test02random1only(self):
   ''' Fill map1 with random blocks, nothing in map2.
   '''
   for n in range(32):
     data = make_randblock(rand0(8193))
     h1 = self.map1.add(data)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), 0)
Exemple #12
0
 def test03random2only(self):
   ''' Fill map2 with random blocks, nothing in map1.
   '''
   ks2 = set()
   for n in range(32):
     data = make_randblock(rand0(8193))
     h2 = self.map2.add(data)
     ks2.add(h2)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), len(ks2))
Exemple #13
0
 def test01random_identical(self):
   ''' Fill map1 and map2 with identical some random blocks.
   '''
   for _ in range(32):
     data = make_randblock(rand0(8193))
     h1 = self.map1.add(data)
     h2 = self.map2.add(data)
     self.assertEqual(h1, h2)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), 0)
Exemple #14
0
 def test02RoundTripSingleBlock(self):
     ''' Generate various block types, serialise then deserialise each.
 '''
     S = self.S
     with S:
         for block_type in BlockType.BT_HASHCODE, BlockType.BT_RLE, \
                           BlockType.BT_LITERAL, BlockType.BT_SUBBLOCK, \
                           BlockType.BT_INDIRECT:
             size = rand0(16385)
             with self.subTest(type=block_type, size=size):
                 B = self._make_random_Block(block_type=block_type)
                 Bserial = B.encode()
                 BR2, offset = BlockRecord.parse_bytes(Bserial)
                 B2 = BR2.block
                 self.assertEqual(
                     offset, len(Bserial),
                     "decoded %d bytes but len(Bserial)=%d" %
                     (offset, len(Bserial)))
                 self._verify_block(B2)
                 if block_type != BlockType.BT_INDIRECT:
                     self.assertEqual(B.type, B2.type, "block types differ")
                     self.assertEqual(B.indirect, B2.indirect,
                                      "block indirects differ")
                 self.assertEqual(B.span, B2.span, "span lengths differ")
                 self.assertEqual(B.get_spanned_data(),
                                  B2.get_spanned_data(),
                                  "spanned data differ")
                 Btype = B2.type
                 if Btype == BlockType.BT_INDIRECT:
                     self.assertTrue(B.indirect)
                     self._verify_block(B2.superblock)
                 else:
                     self.assertFalse(B.indirect)
                     self.assertEqual(B.span, sum(map(len, B)))
                     if Btype == BlockType.BT_HASHCODE:
                         self.assertEqual(B.hashcode, B2.hashcode)
                     elif Btype == BlockType.BT_RLE:
                         self.assertEqual(B2.get_spanned_data(),
                                          B2.octet * B2.span)
                     elif Btype == BlockType.BT_LITERAL:
                         raise unittest.SkipTest(
                             "no specific test for LiteralBlock")
                     elif Btype == BlockType.BT_SUBBLOCK:
                         self._verify_block(B2.superblock)
                     else:
                         raise unittest.SkipTest(
                             "no type specific tests for Block type %r" %
                             (block_type, ))
Exemple #15
0
 def testhcu02hashcodes(self):
   ''' Various tests.
   '''
   M1 = self.S
   KS1 = set()
   # add 16 random blocks to the map with some sanity checks along the way
   for n in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     self.assertIn(h, M1)
     self.assertNotIn(h, KS1)
     KS1.add(h)
     sleep(0.1)
     ##self.assertLen(M1, n + 1)
     ##self.assertEqual(len(KS1), n + 1)
     ##self.assertEqual(set(iter(M1)), KS1)
     ##self.assertEqual(set(M1.hashcodes()), KS1)
   # asking for 0 hashcodes is forbidden
   with self.assertRaises(ValueError):
     # NB: using list() to iterate over the generator, thus executing .hashcodes
     hs = list(M1.hashcodes(length=0))
   # fetch the leading n hashcodes from the map, with and without `after`
   for after in False, True:
     with self.subTest(after=after):
       for n in range(1, 16):
         if after:
           start_hashcode = None
           for mincode in accumulate(iter(M1), min):
             start_hashcode = mincode
           if start_hashcode is None:
             # no start_hashcode, skip when after is true
             continue
         else:
           start_hashcode = None
         hs = list(
             M1.hashcodes(
                 start_hashcode=start_hashcode, after=after, length=n
             )
         )
         self.assertIsOrdered(hs, False)
         hn = min(n, 15 if after else 16)
         self.assertEqual(len(hs), hn)
   # traverse the map in various sized steps, including random
   sorted_keys = sorted(KS1)
   for step_size in 1, 2, 3, 7, 8, 15, 16, None:
     with self.subTest(step_size=step_size):
       start_hashcode = None
       keys_offset = 0
       seen = set()
       while keys_offset < len(sorted_keys):
         if step_size is None:
           n = random.randint(1, 7)
         else:
           n = step_size
         with self.subTest(
             start_hashcode=start_hashcode,
             keys_offset=keys_offset,
             n=n,
         ):
           after = start_hashcode is not None
           hs = list(
               M1.hashcodes(
                   start_hashcode=start_hashcode, length=n, after=after
               )
           )
           # verify that no key has been seen before
           for h in hs:
             self.assertNotIn(h, seen)
           # verify ordering of returned list
           self.assertIsOrdered(hs, strict=True)
           # verify that least key is > start_hashcode
           if start_hashcode is not None:
             self.assertLess(start_hashcode, hs[0])
           hn = min(len(sorted_keys) - keys_offset, n)
           self.assertEqual(len(hs), hn)
           # verify returned keys against master list
           for i in range(hn):
             self.assertEqual(sorted_keys[keys_offset + i], hs[i])
           # note these keys, advance
           seen.update(hs)
           keys_offset += hn
           start_hashcode = hs[-1]
       # verify that all keys have been retrieved
       self.assertEqual(sorted_keys, sorted(seen))