Beispiel #1
0
 def testhcu00first(self):
   ''' Trivial test adding 2 blocks.
   '''
   M1 = self.S
   KS1 = set()
   # test emptiness
   self.assertLen(M1, 0)
   # add one block
   data = make_randblock(rand0(8193))
   h = M1.add(data)
   self.assertIn(h, M1)
   self.assertEqual(M1[h], data)
   KS1.add(h)
   self.assertIn(h, M1)
   mks = set(M1.keys())
   self.assertIn(h, mks)
   mks = set(M1.hashcodes())
   ##self.assertEqual(set(M1.hashcodes()), KS1)
   if mks != KS1:
     warning(
         "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks,
         mks - KS1
     )
   # add another block
   data2 = make_randblock(rand0(8193))
   h2 = M1.add(data2)
   KS1.add(h2)
   mks2 = set(M1.hashcodes())
   ##self.assertEqual(mks2, KS1)
   if mks2 != KS1:
     warning(
         "M1.hashcodes != KS1: M1 missing %r, KS1 missing %r", KS1 - mks2,
         mks2 - KS1
     )
Beispiel #2
0
 def testhcu03hashcodes_missing(self):
   ''' Test the hashcodes_missing function.
   '''
   M1 = self.S
   KS1 = set()
   for _ in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     KS1.add(h)
   with MappingStore("M2MappingStore", mapping={},
                     hashclass=M1.hashclass) as M2:
     KS2 = set()
     # construct M2 as a mix of M1 and random new blocks
     for _ in range(16):
       if randbool():
         data = make_randblock(rand0(8193))
         h = M2.add(data)
         KS2.add(h)
       else:
         M1ks = list(M1.hashcodes())
         if not M1ks:
           continue
         M1hash = M1ks[rand0(len(M1ks))]
         data = M1[M1hash]
         h = M2.add(data)
         self.assertEqual(h, M1hash)
         self.assertIn(h, M2)
         KS2.add(h)
Beispiel #3
0
 def testhcu01test_hashcodes_from(self):
   ''' Test the hashcodes_from method.
   '''
   # fill map1 with 16 random data blocks
   M1 = self.S
   hashcodes_added = set()
   for _ in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     hashcodes_added.add(h)
   # make a block not in the map
   data2 = make_randblock(rand0(8193))
   hashcode_other = self.S.hash(data2)
   self.assertNotIn(
       hashcode_other, hashcodes_added,
       "abort test: %s in previous blocks" % (hashcode_other,)
   )
   #
   # extract hashes using Store.hashcodes_from, check results
   #
   ks = sorted(hashcodes_added)
   for start_hashcode in [None] + list(hashcodes_added) + [hashcode_other]:
     with self.subTest(M1type=type(M1).__name__,
                       start_hashcode=start_hashcode):
       hashcodes_from = list(M1.hashcodes_from(start_hashcode=start_hashcode))
       self.assertIsOrdered(hashcodes_from, strict=True)
       if start_hashcode is not None:
         for h in hashcodes_from:
           self.assertGreaterEqual(
               h, start_hashcode,
               "NOT start_hashocde=%s <= h=%s" % (start_hashcode, h)
           )
         self.assertTrue(
             all(map(lambda h: h >= start_hashcode, hashcodes_from))
         )
Beispiel #4
0
 def test_shuffled_randomblocks(self):
     ''' Save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     # save random blocks to a file
     blocks = {}
     with open(self.pathname, 'wb') as f:
         for n in range(RUN_SIZE):
             with self.subTest(put_block_n=n):
                 data = make_randblock(rand0(MAX_BLOCK_SIZE + 1))
                 dr = DataRecord(data)
                 offset = f.tell()
                 blocks[offset] = data
                 f.write(bytes(dr))
     # shuffle the block offsets
     offsets = list(blocks.keys())
     random.shuffle(offsets)
     # retrieve the blocks in random order, check for correct content
     with open(self.pathname, 'rb') as f:
         for n, offset in enumerate(offsets):
             with self.subTest(shuffled_offsets_n=n, offset=offset):
                 f.seek(offset)
                 bfr = CornuCopyBuffer.from_file(f)
                 dr = DataRecord.parse(bfr)
                 data = dr.data
                 self.assertTrue(data == blocks[offset])
Beispiel #5
0
 def test_shuffled_randomblocks(self):
     ''' Save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     for cls in RawBackingFile, CompressibleBackingFile:
         for _, hashclass in sorted(HASHCLASS_BY_NAME.items()):
             with self.subTest(cls=cls, hashclass=hashclass):
                 with NamedTemporaryFile(dir='.',
                                         prefix=cls.__name__ + '-') as T:
                     blocks = {}
                     index = BinaryHashCodeIndex(
                         hashclass=hashclass,
                         binary_index={},
                         index_entry_class=BackingFileIndexEntry)
                     total_length = 0
                     # open and save data
                     with cls(T.name, hashclass=hashclass,
                              index=index) as bf:
                         for _ in range(RUN_SIZE):
                             data = make_randblock(rand0(MAX_BLOCK_SIZE +
                                                         1))
                             h = bf.add(data)
                             blocks[h] = data
                             total_length += len(data)
                     # reopen and retrieve
                     with cls(T.name, hashclass=hashclass,
                              index=index) as bf:
                         # retrieve in random order
                         hashcodes = list(blocks.keys())
                         random.shuffle(hashcodes)
                         for h in hashcodes:
                             data = bf[h]
                             self.assertEqual(data, blocks[h])
Beispiel #6
0
 def test_shuffled_randomblocks_vtd(self):
     ''' Like test_shuffled_randomblocks but using a .vtd file and binary index file:
     save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     for _, hashclass in sorted(HASHCLASS_BY_NAME.items()):
         with self.subTest(hashclass=hashclass):
             with TemporaryDirectory(dir='.') as TDname:
                 with NamedTemporaryFile(dir=TDname,
                                         prefix='VTDStore-',
                                         suffix='.vtd') as T:
                     blocks = {}
                     total_length = 0
                     # open and save data
                     with VTDStore(T.name, T.name,
                                   hashclass=hashclass) as S:
                         for _ in range(RUN_SIZE):
                             data = make_randblock(rand0(MAX_BLOCK_SIZE +
                                                         1))
                             h = S.add(data)
                             blocks[h] = data
                             total_length += len(data)
                     # reopen and retrieve
                     with VTDStore(T.name, T.name,
                                   hashclass=hashclass) as S:
                         # retrieve in random order
                         hashcodes = list(blocks.keys())
                         random.shuffle(hashcodes)
                         for h in hashcodes:
                             data = S[h]
                             self.assertEqual(data, blocks[h])
Beispiel #7
0
 def test04random_mixed(self):
   ''' Fill both maps with some overlap.
   '''
   ks1 = set()
   ks2 = set()
   for n in range(32):
     data = make_randblock(rand0(8193))
     choice = randint(0, 2)
     if choice <= 1:
       h1 = self.map1.add(data)
       ks1.add(h1)
     if choice >= 1:
       h2 = self.map2.add(data)
       ks2.add(h2)
   for window_size in 1, 7, 16, 23, 32, 1024:
     with self.subTest(window_size=window_size):
       # items in map1 not in map2
       missing = set(
           self.miss_generator(self.map2, self.map1, window_size=window_size)
       )
       self.assertEqual(missing, ks1 - ks2)
       # items in map2 not in map1
       missing = set(
           self.miss_generator(self.map1, self.map2, window_size=window_size)
       )
       self.assertEqual(missing, ks2 - ks1)
Beispiel #8
0
 def test02random1only(self):
   ''' Fill map1 with random blocks, nothing in map2.
   '''
   for n in range(32):
     data = make_randblock(rand0(8193))
     h1 = self.map1.add(data)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), 0)
Beispiel #9
0
 def test03random2only(self):
   ''' Fill map2 with random blocks, nothing in map1.
   '''
   ks2 = set()
   for n in range(32):
     data = make_randblock(rand0(8193))
     h2 = self.map2.add(data)
     ks2.add(h2)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), len(ks2))
Beispiel #10
0
 def test01random_identical(self):
   ''' Fill map1 and map2 with identical some random blocks.
   '''
   for _ in range(32):
     data = make_randblock(rand0(8193))
     h1 = self.map1.add(data)
     h2 = self.map2.add(data)
     self.assertEqual(h1, h2)
   missing = list(self.miss_generator(self.map1, self.map2))
   self.assertEqual(len(missing), 0)
Beispiel #11
0
 def test02add_get(self):
   ''' Add random chunks, get them back.
   '''
   S = self.S
   self.assertLen(S, 0)
   random_chunk_map = {}
   for _ in range(16):
     size = random.randint(127, 16384)
     data = make_randblock(size)
     h = S.hash(data)
     h2 = S.add(data)
     self.assertEqual(h, h2)
     random_chunk_map[h] = data
   for h in random_chunk_map:
     chunk = S.get(h)
     self.assertIsNot(chunk, None)
     self.assertEqual(chunk, random_chunk_map[h])
Beispiel #12
0
 def test01add_new_block(self):
   ''' Add a block and check that it worked.
   '''
   S = self.S
   # compute block hash but do not store
   size = random.randint(127, 16384)
   data = make_randblock(size)
   h = S.hash(data)
   ok = S.contains(h)
   self.assertFalse(ok)
   self.assertNotIn(h, S)
   # now add the block
   h2 = S.add(data)
   self.assertEqual(h, h2)
   ok = S.contains(h)
   self.assertTrue(ok)
   self.assertIn(h, S)
Beispiel #13
0
 def test02full_duplex_random_payloads(self):
   ''' Throw 16 packets up, collect responses after requests queued.
   '''
   rqs = []
   for _ in range(16):
     size = rand0(16385)
     data = make_randblock(size)
     flags = rand0(65537)
     R = self.local_conn.request(0, flags, data, self._decode_response, 0)
     rqs.append((R, flags, data))
   random.shuffle(rqs)
   for rq in rqs:
     R, flags, data = rq
     ok, flags, payload = R()
     self.assertTrue(ok, "response status not ok")
     self.assertEqual(flags, 0x11)
     self.assertEqual(payload, bytes(reversed(data)))
Beispiel #14
0
 def testhcu02hashcodes(self):
   ''' Various tests.
   '''
   M1 = self.S
   KS1 = set()
   # add 16 random blocks to the map with some sanity checks along the way
   for n in range(16):
     data = make_randblock(rand0(8193))
     h = M1.add(data)
     self.assertIn(h, M1)
     self.assertNotIn(h, KS1)
     KS1.add(h)
     sleep(0.1)
     ##self.assertLen(M1, n + 1)
     ##self.assertEqual(len(KS1), n + 1)
     ##self.assertEqual(set(iter(M1)), KS1)
     ##self.assertEqual(set(M1.hashcodes()), KS1)
   # asking for 0 hashcodes is forbidden
   with self.assertRaises(ValueError):
     # NB: using list() to iterate over the generator, thus executing .hashcodes
     hs = list(M1.hashcodes(length=0))
   # fetch the leading n hashcodes from the map, with and without `after`
   for after in False, True:
     with self.subTest(after=after):
       for n in range(1, 16):
         if after:
           start_hashcode = None
           for mincode in accumulate(iter(M1), min):
             start_hashcode = mincode
           if start_hashcode is None:
             # no start_hashcode, skip when after is true
             continue
         else:
           start_hashcode = None
         hs = list(
             M1.hashcodes(
                 start_hashcode=start_hashcode, after=after, length=n
             )
         )
         self.assertIsOrdered(hs, False)
         hn = min(n, 15 if after else 16)
         self.assertEqual(len(hs), hn)
   # traverse the map in various sized steps, including random
   sorted_keys = sorted(KS1)
   for step_size in 1, 2, 3, 7, 8, 15, 16, None:
     with self.subTest(step_size=step_size):
       start_hashcode = None
       keys_offset = 0
       seen = set()
       while keys_offset < len(sorted_keys):
         if step_size is None:
           n = random.randint(1, 7)
         else:
           n = step_size
         with self.subTest(
             start_hashcode=start_hashcode,
             keys_offset=keys_offset,
             n=n,
         ):
           after = start_hashcode is not None
           hs = list(
               M1.hashcodes(
                   start_hashcode=start_hashcode, length=n, after=after
               )
           )
           # verify that no key has been seen before
           for h in hs:
             self.assertNotIn(h, seen)
           # verify ordering of returned list
           self.assertIsOrdered(hs, strict=True)
           # verify that least key is > start_hashcode
           if start_hashcode is not None:
             self.assertLess(start_hashcode, hs[0])
           hn = min(len(sorted_keys) - keys_offset, n)
           self.assertEqual(len(hs), hn)
           # verify returned keys against master list
           for i in range(hn):
             self.assertEqual(sorted_keys[keys_offset + i], hs[i])
           # note these keys, advance
           seen.update(hs)
           keys_offset += hn
           start_hashcode = hs[-1]
       # verify that all keys have been retrieved
       self.assertEqual(sorted_keys, sorted(seen))