def _got_ueb(vup): share_hash_tree = IncompleteHashTree(vcap.total_shares) share_hash_tree.set_hashes({0: vup.share_root_hash}) vrbp = ValidatedReadBucketProxy(sharenum, b, share_hash_tree, vup.num_segments, vup.block_size, vup.share_size) # note: normal download doesn't use get_all_sharehashes(), # because it gets more data than necessary. We've discussed the # security properties of having verification and download look # identical (so the server couldn't, say, provide good responses # for one and not the other), but I think that full verification # is more important than defending against inconsistent server # behavior. Besides, they can't pass the verifier without storing # all the data, so there's not so much to be gained by behaving # inconsistently. d = vrbp.get_all_sharehashes() # we fill share_hash_tree before fetching any blocks, so the # block fetches won't send redundant share-hash-tree requests, to # speed things up. Then we fetch+validate all the blockhashes. d.addCallback(lambda ign: vrbp.get_all_blockhashes()) cht = IncompleteHashTree(vup.num_segments) cht.set_hashes({0: vup.crypttext_root_hash}) d.addCallback(lambda ign: vrbp.get_all_crypttext_hashes(cht)) d.addCallback(lambda ign: vrbp) return d
def _parse_and_store_UEB(self, UEB_s): # Note: the UEB contains needed_shares and total_shares. These are # redundant and inferior (the filecap contains the authoritative # values). However, because it is possible to encode the same file in # multiple ways, and the encoders might choose (poorly) to use the # same key for both (therefore getting the same SI), we might # encounter shares for both types. The UEB hashes will be different, # however, and we'll disregard the "other" encoding's shares as # corrupted. # therefore, we ignore d['total_shares'] and d['needed_shares']. d = uri.unpack_extension(UEB_s) log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", ueb=repr(uri.unpack_extension_readable(UEB_s)), vcap=self._verifycap.to_string(), level=log.NOISY, parent=self._lp, umid="cVqZnA") k, N = self._verifycap.needed_shares, self._verifycap.total_shares self.segment_size = d['segment_size'] self._segsize_observers.fire(self.segment_size) r = self._calculate_sizes(self.segment_size) self.tail_segment_size = r["tail_segment_size"] self.tail_segment_padded = r["tail_segment_padded"] self.num_segments = r["num_segments"] self.block_size = r["block_size"] self.tail_block_size = r["tail_block_size"] log.msg("actual sizes: %s" % (r,), level=log.NOISY, parent=self._lp, umid="PY6P5Q") if (self.segment_size == self.guessed_segment_size and self.num_segments == self.guessed_num_segments): log.msg("my guess was right!", level=log.NOISY, parent=self._lp, umid="x340Ow") else: log.msg("my guess was wrong! Extra round trips for me.", level=log.NOISY, parent=self._lp, umid="tb7RJw") # zfec.Decode() instantiation is fast, but still, let's use the same # codec instance for all but the last segment. 3-of-10 takes 15us on # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is # 2.5ms, worst-case 254-of-255 is 9.3ms self._codec = CRSDecoder() self._codec.set_params(self.segment_size, k, N) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.ciphertext_hash_tree was a guess before: # this is where we create it for real. self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) self.ciphertext_hash_tree_leaves = self.num_segments self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) self.share_hash_tree.set_hashes({0: d['share_root_hash']})
def _build_guessed_tables(self, max_segment_size): size = min(self._verifycap.size, max_segment_size) s = mathutil.next_multiple(size, self._verifycap.needed_shares) self.guessed_segment_size = s r = self._calculate_sizes(self.guessed_segment_size) self.guessed_num_segments = r["num_segments"] # as with CommonShare, our ciphertext_hash_tree is a stub until we # get the real num_segments self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments) self.ciphertext_hash_tree_leaves = self.guessed_num_segments
def __init__(self, best_numsegs, si_prefix, shnum, logparent): self.si_prefix = si_prefix self.shnum = shnum # in the beginning, before we have the real UEB, we can only guess at # the number of segments. But we want to ask for block hashes early. # So if we're asked for which block hashes are needed before we know # numsegs for sure, we return a guess. self._block_hash_tree = IncompleteHashTree(best_numsegs) self._block_hash_tree_is_authoritative = False self._block_hash_tree_leaves = best_numsegs self._logparent = logparent
def _guess_offsets(self, verifycap, guessed_segment_size): self.guessed_segment_size = guessed_segment_size size = verifycap.size k = verifycap.needed_shares N = verifycap.total_shares r = self._node._calculate_sizes(guessed_segment_size) # num_segments, block_size/tail_block_size # guessed_segment_size/tail_segment_size/tail_segment_padded share_size = mathutil.div_ceil(size, k) # share_size is the amount of block data that will be put into each # share, summed over all segments. It does not include hashes, the # UEB, or other overhead. # use the upload-side code to get this as accurate as possible ht = IncompleteHashTree(N) num_share_hashes = len(ht.needed_hashes(0, include_leaf=True)) wbp = make_write_bucket_proxy(None, None, share_size, r["block_size"], r["num_segments"], num_share_hashes, 0) self._fieldsize = wbp.fieldsize self._fieldstruct = wbp.fieldstruct self.guessed_offsets = wbp._offsets
def set_authoritative_num_segments(self, numsegs): if self._block_hash_tree_leaves != numsegs: self._block_hash_tree = IncompleteHashTree(numsegs) self._block_hash_tree_leaves = numsegs self._block_hash_tree_is_authoritative = True
def __init__(self, verifycap, storage_broker, secret_holder, terminator, history, download_status): assert isinstance(verifycap, uri.CHKFileVerifierURI) self._verifycap = verifycap self._storage_broker = storage_broker self._si_prefix = base32.b2a(verifycap.storage_index[:8])[:12] self.running = True if terminator: terminator.register(self) # calls self.stop() at stopService() # the rules are: # 1: Only send network requests if you're active (self.running is True) # 2: Use TimerService, not reactor.callLater # 3: You can do eventual-sends any time. # These rules should mean that once # stopService()+flushEventualQueue() fires, everything will be done. self._secret_holder = secret_holder self._history = history self._download_status = download_status self.share_hash_tree = IncompleteHashTree(self._verifycap.total_shares) # we guess the segment size, so Segmentation can pull non-initial # segments in a single roundtrip. This populates # .guessed_segment_size, .guessed_num_segments, and # .ciphertext_hash_tree (with a dummy, to let us guess which hashes # we'll need) self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE) # filled in when we parse a valid UEB self.have_UEB = False self.segment_size = None self.tail_segment_size = None self.tail_segment_padded = None self.num_segments = None self.block_size = None self.tail_block_size = None # things to track callers that want data # _segment_requests can have duplicates self._segment_requests = [] # (segnum, d, cancel_handle, seg_ev, lp) self._active_segment = None # a SegmentFetcher, with .segnum self._segsize_observers = observer.OneShotObserverList() # we create one top-level logparent for this _Node, and another one # for each read() call. Segmentation and get_segment() messages are # associated with the read() call, everything else is tied to the # _Node's log entry. lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:" " size=%(size)d," " guessed_segsize=%(guessed_segsize)d," " guessed_numsegs=%(guessed_numsegs)d", si=self._si_prefix, size=verifycap.size, guessed_segsize=self.guessed_segment_size, guessed_numsegs=self.guessed_num_segments, level=log.OPERATIONAL, umid="uJ0zAQ") self._lp = lp self._sharefinder = ShareFinder(storage_broker, verifycap, self, self._download_status, lp) self._shares = set()