class TestRecipeFinder(unittest.TestCase, WorkdirHelper): def setUp(self): self.remove_at_teardown = [] self.dbname = self.createTmpName() self.blocksdb = BlocksDB(self.dbname, 3) self.piece_handler = FakePieceHandler() self.integer_set = IntegerSet(1) def testSimpleUnaligned(self): self.integer_set.add(3298534883712) # "aaa" recipe_finder = RecipeFinder(self.blocksdb, 3, self.integer_set, None, original_piece_handler = self.piece_handler) self.blocksdb.begin() self.blocksdb.add_block("47bce5c74f589f4867dbd57e9ca9f808", 0, "47bce5c74f589f4867dbd57e9ca9f808") self.blocksdb.commit() recipe_finder.feed("XX") recipe_finder.feed("Xa") recipe_finder.feed("aa") recipe_finder.close() recipe = recipe_finder.get_recipe() self.assertEquals(recipe, {'md5sum': '5afc35e6684b843ceb498f5031f22660', 'method': 'concat', 'size': 6, 'pieces': [{'source': 'FAKEBLOB', 'size': 3L, 'original': True, 'repeat': 1, 'offset': 0}, {'source': u'47bce5c74f589f4867dbd57e9ca9f808', 'size': 3, 'original': False, 'repeat': 1, 'offset': 0}] })
def setUp(self): self.remove_at_teardown = [] self.dbname = self.createTmpName() self.blocksdb = BlocksDB(self.dbname, 3) self.piece_handler = FakePieceHandler() self.integer_set = IntegerSet(1)
def setUp(self): self.remove_at_teardown = [] self.workdir = self.createTmpName() os.mkdir(self.workdir) self.dbfile = os.path.join(self.workdir, "database.sqlite") self.db = BlocksDB(self.dbfile, 2**16)
class TestBlockLocationsDB(unittest.TestCase, WorkdirHelper): def setUp(self): self.remove_at_teardown = [] self.workdir = self.createTmpName() os.mkdir(self.workdir) self.dbfile = os.path.join(self.workdir, "database.sqlite") self.db = BlocksDB(self.dbfile, 2**16) def testCompleteCorruption(self): del self.db with open(self.dbfile, "w") as f: f.write("X" * 100000) self.assertRaises(SoftCorruptionError, BlocksDB, self.dbfile, 2**16) def testCrcCorruption(self): self.db.begin() self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 0, "00000000000000000000000000000000") self.db.commit() con = sqlite3.connect(self.dbfile) con.execute("UPDATE blocks SET offset = 1") con.commit() self.assertRaises(SoftCorruptionError, self.db.get_block_locations, "00000000000000000000000000000000") def testRollingEmpty(self): self.assertEquals(self.db.get_all_rolling(), []) def testRollingSimple(self): self.db.begin() self.db.add_rolling(17) self.db.commit() self.assertEquals(self.db.get_all_rolling(), [17]) def testRollingDuplicate(self): self.db.begin() self.db.add_rolling(17) self.db.add_rolling(17) self.db.commit() self.assertEquals(self.db.get_all_rolling(), [17]) def testRollingRange(self): self.db.begin() self.db.add_rolling(0) self.db.add_rolling(2**64 - 1) self.db.commit() self.assertEquals(set(self.db.get_all_rolling()), set([0, 2**64 - 1])) self.db.begin() self.assertRaises(OverflowError, self.db.add_rolling, -1) self.assertRaises(OverflowError, self.db.add_rolling, 2**64) def testHighBlock(self): self.db.begin() self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 2**32 + 1, "00000000000000000000000000000000") self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 2**64 - 1, "00000000000000000000000000000001") self.db.commit() self.assertEquals(list(self.db.get_block_locations("00000000000000000000000000000000")), [("d41d8cd98f00b204e9800998ecf8427e", 2**32 + 1)]) self.assertEquals(list(self.db.get_block_locations("00000000000000000000000000000001")), [("d41d8cd98f00b204e9800998ecf8427e", 2**64 - 1)]) def testOffsetLimits(self): self.db.begin() self.assertRaises(OverflowError, self.db.add_block, "d41d8cd98f00b204e9800998ecf8427e", -1, "00000000000000000000000000000002") self.assertRaises(OverflowError, self.db.add_block, "d41d8cd98f00b204e9800998ecf8427e", 2**64, "00000000000000000000000000000002") def testBlockSimple(self): # blob, offset, md5 self.db.begin() self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 0, "00000000000000000000000000000000") self.db.commit() self.assertEquals(list(self.db.get_block_locations("00000000000000000000000000000000")), [("d41d8cd98f00b204e9800998ecf8427e", 0)]) def testBlockDuplicate(self): # blob, offset, md5 self.db.begin() self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 0, "00000000000000000000000000000000") self.db.add_block("d41d8cd98f00b204e9800998ecf8427e", 0, "00000000000000000000000000000000") self.db.commit() self.assertEquals(list(self.db.get_block_locations("00000000000000000000000000000000")), [("d41d8cd98f00b204e9800998ecf8427e", 0)]) def tearDown(self): for d in self.remove_at_teardown: shutil.rmtree(d, ignore_errors = True)
from future import standard_library standard_library.install_aliases() from builtins import str from builtins import range from deduplication import BlocksDB import tempfile import random from time import time import os #tmpfile = tempfile.NamedTemporaryFile(dir="/tmp") filename = "/gigant/tmp/benchmark.db" #filename = tmpfile.name #print "Using db", tmpfile.name blocksize = 2**16 db = BlocksDB(filename, blocksize) #db = BlockLocationsDB(blocksize, ":memory:") #db = BlockLocationsDB(blocksize, "/gigant/tmp/benchmark.db") #for n in range(0, random.randrange(20000)): """ for c in range(0, 100): db.begin() for n in range(0, 100000): db.add_block(blob = "ablob".zfill(32), offset = 0, md5 = str(random.randrange(2**64)).zfill(32)) db.add_rolling(random.randrange(2**64)) db.commit() print c """