def test_create(self): ht = hashtree.IncompleteHashTree(6) ht = hashtree.IncompleteHashTree(9) ht = hashtree.IncompleteHashTree(8) self.failUnlessEqual(ht[0], None) self.failUnlessEqual(ht.get_leaf(0), None) self.failUnlessRaises(IndexError, ht.get_leaf, 8) self.failUnlessEqual(ht.get_leaf_index(0), 7)
def _setup_download(self): self._started = time.time() self._status.set_status("Retrieving Shares") # how many shares do we need? (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo # first, which servers can we use? versionmap = self.servermap.make_versionmap() shares = versionmap[self.verinfo] # this sharemap is consumed as we decide to send requests self.remaining_sharemap = DictOfSets() for (shnum, server, timestamp) in shares: self.remaining_sharemap.add(shnum, server) # Reuse the SlotReader from the servermap. key = (self.verinfo, server.get_serverid(), self._storage_index, shnum) if key in self.servermap.proxies: reader = self.servermap.proxies[key] else: reader = MDMFSlotReadProxy(server.get_rref(), self._storage_index, shnum, None) reader.server = server self.readers[shnum] = reader if len(self.remaining_sharemap) < k: self._raise_notenoughshareserror() self.shares = {} # maps shnum to validated blocks self._active_readers = [] # list of active readers for this dl. self._block_hash_trees = {} # shnum => hashtree for i in xrange(self._total_shares): # So we don't have to do this later. self._block_hash_trees[i] = hashtree.IncompleteHashTree(self._num_segments) # We need one share hash tree for the entire file; its leaves # are the roots of the block hash trees for the shares that # comprise it, and its root is in the verinfo. self.share_hash_tree = hashtree.IncompleteHashTree(N) self.share_hash_tree.set_hashes({0: root_hash})
def _got_results_one_share(self, shnum, peerid, got_prefix, got_hash_and_data): self.log("_got_results: got shnum #%d from peerid %s" % (shnum, idlib.shortnodeid_b2a(peerid))) (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo assert len(got_prefix) == len(prefix), (len(got_prefix), len(prefix)) if got_prefix != prefix: msg = "someone wrote to the data since we read the servermap: prefix changed" raise UncoordinatedWriteError(msg) (share_hash_chain, block_hash_tree, share_data) = unpack_share_data(self.verinfo, got_hash_and_data) assert isinstance(share_data, str) # build the block hash tree. SDMF has only one leaf. leaves = [hashutil.block_hash(share_data)] t = hashtree.HashTree(leaves) if list(t) != block_hash_tree: raise CorruptShareError(peerid, shnum, "block hash tree failure") share_hash_leaf = t[0] t2 = hashtree.IncompleteHashTree(N) # root_hash was checked by the signature t2.set_hashes({0: root_hash}) try: t2.set_hashes(hashes=share_hash_chain, leaves={shnum: share_hash_leaf}) except (hashtree.BadHashError, hashtree.NotEnoughHashesError, IndexError), e: msg = "corrupt hashes: %s" % (e, ) raise CorruptShareError(peerid, shnum, msg)
def _got_results_one_share(self, shnum, peerid, data): self.check_prefix(peerid, shnum, data) # the [seqnum:signature] pieces are validated by _compare_prefix, # which checks their signature against the pubkey known to be # associated with this file. (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = unpack_share(data) # validate [share_hash_chain,block_hash_tree,share_data] leaves = [hashutil.block_hash(share_data)] t = hashtree.HashTree(leaves) if list(t) != block_hash_tree: raise CorruptShareError(peerid, shnum, "block hash tree failure") share_hash_leaf = t[0] t2 = hashtree.IncompleteHashTree(N) # root_hash was checked by the signature t2.set_hashes({0: root_hash}) try: t2.set_hashes(hashes=share_hash_chain, leaves={shnum: share_hash_leaf}) except (hashtree.BadHashError, hashtree.NotEnoughHashesError, IndexError), e: msg = "corrupt hashes: %s" % (e, ) raise CorruptShareError(peerid, shnum, msg)
def test_needed_hashes(self): ht = hashtree.IncompleteHashTree(8) self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) ht = hashtree.IncompleteHashTree(1) self.failUnlessEqual(ht.needed_hashes(0), set([])) ht = hashtree.IncompleteHashTree(6) self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1])) self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1])) self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1]))
def test_depth_of(self): hashtree.IncompleteHashTree(8) self.failUnlessEqual(hashtree.depth_of(0), 0) for i in [1, 2]: self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d" % i) for i in [3, 4, 5, 6]: self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d" % i) for i in [7, 8, 9, 10, 11, 12, 13, 14]: self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d" % i)
def do_test_speed(self, SIZE): # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB # segsize) takes: # 7s to build the (complete) HashTree # 13s to set up the dictionary # 10s to run set_hashes() ht = make_tree(SIZE) iht = hashtree.IncompleteHashTree(SIZE) needed = set() for i in range(SIZE): needed.update(ht.needed_hashes(i, True)) all = dict([(i, ht[i]) for i in needed]) iht.set_hashes(hashes=all)
def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size): """ share_hash_tree is required to have already been initialized with the root hash (the number-0 hash), using the share_root_hash from the UEB""" precondition(share_hash_tree[0] is not None, share_hash_tree) prefix = "%d-%s-%s" % (sharenum, bucket, base32.b2a_l(share_hash_tree[0][:8], 60)) log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix) self.sharenum = sharenum self.bucket = bucket self.share_hash_tree = share_hash_tree self.num_blocks = num_blocks self.block_size = block_size self.share_size = share_size self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks)
def _setup_download(self): self._started = time.time() self._status.set_status("Retrieving Shares") # how many shares do we need? (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo # first, which servers can we use? versionmap = self.servermap.make_versionmap() shares = versionmap[self.verinfo] # this sharemap is consumed as we decide to send requests self.remaining_sharemap = DictOfSets() for (shnum, server, timestamp) in shares: self.remaining_sharemap.add(shnum, server) # If the servermap update fetched anything, it fetched at least 1 # KiB, so we ask for that much. # TODO: Change the cache methods to allow us to fetch all of the # data that they have, then change this method to do that. any_cache = self._node._read_from_cache(self.verinfo, shnum, 0, 1000) reader = MDMFSlotReadProxy(server.get_rref(), self._storage_index, shnum, any_cache) reader.server = server self.readers[shnum] = reader assert len(self.remaining_sharemap) >= k self.shares = {} # maps shnum to validated blocks self._active_readers = [] # list of active readers for this dl. self._block_hash_trees = {} # shnum => hashtree # We need one share hash tree for the entire file; its leaves # are the roots of the block hash trees for the shares that # comprise it, and its root is in the verinfo. self.share_hash_tree = hashtree.IncompleteHashTree(N) self.share_hash_tree.set_hashes({0: root_hash})
def test_check(self): # first create a complete hash tree ht = make_tree(6) # then create a corresponding incomplete tree iht = hashtree.IncompleteHashTree(6) # suppose we wanted to validate leaf[0] # leaf[0] is the same as node[7] self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) iht[0] = ht[0] # set the root self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) iht[5] = ht[5] self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) # reset iht = hashtree.IncompleteHashTree(6) current_hashes = list(iht) # this should fail because there aren't enough hashes known try: iht.set_hashes(leaves={0: tagged_hash("tag", "0")}) except hashtree.NotEnoughHashesError: pass else: self.fail("didn't catch not enough hashes") # and the set of hashes stored in the tree should still be the same self.failUnlessEqual(list(iht), current_hashes) # and we should still need the same self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]} # this should fail because the leaf hash is just plain wrong try: iht.set_hashes(chain, leaves={0: tagged_hash("bad tag", "0")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") # this should fail because we give it conflicting hashes: one as an # internal node, another as a leaf try: iht.set_hashes(chain, leaves={1: tagged_hash("bad tag", "1")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") bad_chain = chain.copy() bad_chain[2] = ht[2] + "BOGUS" # this should fail because the internal hash is wrong try: iht.set_hashes(bad_chain, leaves={0: tagged_hash("tag", "0")}) except hashtree.BadHashError: pass else: self.fail("didn't catch bad hash") # this should succeed try: iht.set_hashes(chain, leaves={0: tagged_hash("tag", "0")}) except hashtree.BadHashError, e: self.fail("bad hash: %s" % e)
def _setup_encoding_parameters(self): """ I set up the encoding parameters, including k, n, the number of segments associated with this file, and the segment decoders. """ (seqnum, root_hash, IV, segsize, datalength, k, n, known_prefix, offsets_tuple) = self.verinfo self._required_shares = k self._total_shares = n self._segment_size = segsize self._data_length = datalength if not IV: self._version = MDMF_VERSION else: self._version = SDMF_VERSION if datalength and segsize: self._num_segments = mathutil.div_ceil(datalength, segsize) self._tail_data_size = datalength % segsize else: self._num_segments = 0 self._tail_data_size = 0 self._segment_decoder = codec.CRSDecoder() self._segment_decoder.set_params(segsize, k, n) if not self._tail_data_size: self._tail_data_size = segsize self._tail_segment_size = mathutil.next_multiple( self._tail_data_size, self._required_shares) if self._tail_segment_size == self._segment_size: self._tail_decoder = self._segment_decoder else: self._tail_decoder = codec.CRSDecoder() self._tail_decoder.set_params(self._tail_segment_size, self._required_shares, self._total_shares) self.log("got encoding parameters: " "k: %d " "n: %d " "%d segments of %d bytes each (%d byte tail segment)" % \ (k, n, self._num_segments, self._segment_size, self._tail_segment_size)) if self._block_hash_trees is not None: for i in xrange(self._total_shares): # So we don't have to do this later. self._block_hash_trees[i] = hashtree.IncompleteHashTree( self._num_segments) # Our last task is to tell the downloader where to start and # where to stop. We use three parameters for that: # - self._start_segment: the segment that we need to start # downloading from. # - self._current_segment: the next segment that we need to # download. # - self._last_segment: The last segment that we were asked to # download. # # We say that the download is complete when # self._current_segment > self._last_segment. We use # self._start_segment and self._last_segment to know when to # strip things off of segments, and how much to strip. if self._offset: self.log("got offset: %d" % self._offset) # our start segment is the first segment containing the # offset we were given. start = self._offset // self._segment_size assert start < self._num_segments self._start_segment = start self.log("got start segment: %d" % self._start_segment) else: self._start_segment = 0 # If self._read_length is None, then we want to read the whole # file. Otherwise, we want to read only part of the file, and # need to figure out where to stop reading. if self._read_length is not None: # our end segment is the last segment containing part of the # segment that we were asked to read. self.log("got read length %d" % self._read_length) if self._read_length != 0: end_data = self._offset + self._read_length # We don't actually need to read the byte at end_data, # but the one before it. end = (end_data - 1) // self._segment_size assert end < self._num_segments self._last_segment = end else: self._last_segment = self._start_segment self.log("got end segment: %d" % self._last_segment) else: self._last_segment = self._num_segments - 1 self._current_segment = self._start_segment