Esempio n. 1
0
    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
Esempio n. 2
0
 def _build_guessed_tables(self, max_segment_size):
     size = min(self._verifycap.size, max_segment_size)
     s = mathutil.next_multiple(size, self._verifycap.needed_shares)
     self.guessed_segment_size = s
     r = self._calculate_sizes(self.guessed_segment_size)
     self.guessed_num_segments = r["num_segments"]
     # as with CommonShare, our ciphertext_hash_tree is a stub until we
     # get the real num_segments
     self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
     self.ciphertext_hash_tree_leaves = self.guessed_num_segments
Esempio n. 3
0
    def __init__(self, best_numsegs, si_prefix, shnum, logparent):
        self.si_prefix = si_prefix
        self.shnum = shnum

        # in the beginning, before we have the real UEB, we can only guess at
        # the number of segments. But we want to ask for block hashes early.
        # So if we're asked for which block hashes are needed before we know
        # numsegs for sure, we return a guess.
        self._block_hash_tree = IncompleteHashTree(best_numsegs)
        self._block_hash_tree_is_authoritative = False
        self._block_hash_tree_leaves = best_numsegs
        self._logparent = logparent
Esempio n. 4
0
        def _got_ueb(vup):
            share_hash_tree = IncompleteHashTree(vcap.total_shares)
            share_hash_tree.set_hashes({0: vup.share_root_hash})

            vrbp = ValidatedReadBucketProxy(sharenum, b, share_hash_tree,
                                            vup.num_segments, vup.block_size,
                                            vup.share_size)

            # note: normal download doesn't use get_all_sharehashes(),
            # because it gets more data than necessary. We've discussed the
            # security properties of having verification and download look
            # identical (so the server couldn't, say, provide good responses
            # for one and not the other), but I think that full verification
            # is more important than defending against inconsistent server
            # behavior. Besides, they can't pass the verifier without storing
            # all the data, so there's not so much to be gained by behaving
            # inconsistently.
            d = vrbp.get_all_sharehashes()
            # we fill share_hash_tree before fetching any blocks, so the
            # block fetches won't send redundant share-hash-tree requests, to
            # speed things up. Then we fetch+validate all the blockhashes.
            d.addCallback(lambda ign: vrbp.get_all_blockhashes())

            cht = IncompleteHashTree(vup.num_segments)
            cht.set_hashes({0: vup.crypttext_root_hash})
            d.addCallback(lambda ign: vrbp.get_all_crypttext_hashes(cht))

            d.addCallback(lambda ign: vrbp)
            return d
Esempio n. 5
0
    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
Esempio n. 6
0
 def _build_guessed_tables(self, max_segment_size):
     size = min(self._verifycap.size, max_segment_size)
     s = mathutil.next_multiple(size, self._verifycap.needed_shares)
     self.guessed_segment_size = s
     r = self._calculate_sizes(self.guessed_segment_size)
     self.guessed_num_segments = r["num_segments"]
     # as with CommonShare, our ciphertext_hash_tree is a stub until we
     # get the real num_segments
     self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
     self.ciphertext_hash_tree_leaves = self.guessed_num_segments
Esempio n. 7
0
    def _guess_offsets(self, verifycap, guessed_segment_size):
        self.guessed_segment_size = guessed_segment_size
        size = verifycap.size
        k = verifycap.needed_shares
        N = verifycap.total_shares
        r = self._node._calculate_sizes(guessed_segment_size)
        # num_segments, block_size/tail_block_size
        # guessed_segment_size/tail_segment_size/tail_segment_padded
        share_size = mathutil.div_ceil(size, k)
        # share_size is the amount of block data that will be put into each
        # share, summed over all segments. It does not include hashes, the
        # UEB, or other overhead.

        # use the upload-side code to get this as accurate as possible
        ht = IncompleteHashTree(N)
        num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
        wbp = make_write_bucket_proxy(None, None, share_size, r["block_size"],
                                      r["num_segments"], num_share_hashes, 0)
        self._fieldsize = wbp.fieldsize
        self._fieldstruct = wbp.fieldstruct
        self.guessed_offsets = wbp._offsets
Esempio n. 8
0
        def _got_ueb(vup):
            share_hash_tree = IncompleteHashTree(vcap.total_shares)
            share_hash_tree.set_hashes({0: vup.share_root_hash})

            vrbp = ValidatedReadBucketProxy(sharenum, b,
                                            share_hash_tree,
                                            vup.num_segments,
                                            vup.block_size,
                                            vup.share_size)

            # note: normal download doesn't use get_all_sharehashes(),
            # because it gets more data than necessary. We've discussed the
            # security properties of having verification and download look
            # identical (so the server couldn't, say, provide good responses
            # for one and not the other), but I think that full verification
            # is more important than defending against inconsistent server
            # behavior. Besides, they can't pass the verifier without storing
            # all the data, so there's not so much to be gained by behaving
            # inconsistently.
            d = vrbp.get_all_sharehashes()
            # we fill share_hash_tree before fetching any blocks, so the
            # block fetches won't send redundant share-hash-tree requests, to
            # speed things up. Then we fetch+validate all the blockhashes.
            d.addCallback(lambda ign: vrbp.get_all_blockhashes())

            cht = IncompleteHashTree(vup.num_segments)
            cht.set_hashes({0: vup.crypttext_root_hash})
            d.addCallback(lambda ign: vrbp.get_all_crypttext_hashes(cht))

            d.addCallback(lambda ign: vrbp)
            return d
Esempio n. 9
0
 def set_authoritative_num_segments(self, numsegs):
     if self._block_hash_tree_leaves != numsegs:
         self._block_hash_tree = IncompleteHashTree(numsegs)
         self._block_hash_tree_leaves = numsegs
     self._block_hash_tree_is_authoritative = True
Esempio n. 10
0
class CommonShare(object):
    # TODO: defer creation of the hashtree until somebody uses us. There will
    # be a lot of unused shares, and we shouldn't spend the memory on a large
    # hashtree unless necessary.
    """I hold data that is common across all instances of a single share,
    like sh2 on both servers A and B. This is just the block hash tree.
    """
    def __init__(self, best_numsegs, si_prefix, shnum, logparent):
        self.si_prefix = si_prefix
        self.shnum = shnum

        # in the beginning, before we have the real UEB, we can only guess at
        # the number of segments. But we want to ask for block hashes early.
        # So if we're asked for which block hashes are needed before we know
        # numsegs for sure, we return a guess.
        self._block_hash_tree = IncompleteHashTree(best_numsegs)
        self._block_hash_tree_is_authoritative = False
        self._block_hash_tree_leaves = best_numsegs
        self._logparent = logparent

    def __repr__(self):
        return "CommonShare(%s-sh%d)" % (self.si_prefix, self.shnum)

    def set_authoritative_num_segments(self, numsegs):
        if self._block_hash_tree_leaves != numsegs:
            self._block_hash_tree = IncompleteHashTree(numsegs)
            self._block_hash_tree_leaves = numsegs
        self._block_hash_tree_is_authoritative = True

    def need_block_hash_root(self):
        return bool(not self._block_hash_tree[0])

    def set_block_hash_root(self, roothash):
        assert self._block_hash_tree_is_authoritative
        self._block_hash_tree.set_hashes({0: roothash})

    def get_desired_block_hashes(self, segnum):
        if segnum < self._block_hash_tree_leaves:
            return self._block_hash_tree.needed_hashes(segnum,
                                                       include_leaf=True)

        # the segnum might be out-of-bounds. Originally it was due to a race
        # between the receipt of the UEB on one share (from which we learn
        # the correct number of segments, update all hash trees to the right
        # size, and queue a BADSEGNUM to the SegmentFetcher) and the delivery
        # of a new Share to the SegmentFetcher while that BADSEGNUM was
        # queued (which sends out requests to the stale segnum, now larger
        # than the hash tree). I fixed that (by making SegmentFetcher.loop
        # check for a bad segnum at the start of each pass, instead of using
        # the queued BADSEGNUM or a flag it sets), but just in case this
        # still happens, I'm leaving the < in place. If it gets hit, there's
        # a potential lost-progress problem, but I'm pretty sure that it will
        # get cleared up on the following turn.
        return []

    def get_needed_block_hashes(self, segnum):
        assert self._block_hash_tree_is_authoritative
        # XXX: include_leaf=True needs thought: how did the old downloader do
        # it? I think it grabbed *all* block hashes and set them all at once.
        # Since we want to fetch less data, we either need to fetch the leaf
        # too, or wait to set the block hashes until we've also received the
        # block itself, so we can hash it too, and set the chain+leaf all at
        # the same time.
        return self._block_hash_tree.needed_hashes(segnum, include_leaf=True)

    def process_block_hashes(self, block_hashes):
        assert self._block_hash_tree_is_authoritative
        # this may raise BadHashError or NotEnoughHashesError
        self._block_hash_tree.set_hashes(block_hashes)

    def check_block(self, segnum, block):
        assert self._block_hash_tree_is_authoritative
        h = hashutil.block_hash(block)
        # this may raise BadHashError or NotEnoughHashesError
        self._block_hash_tree.set_hashes(leaves={segnum: h})
Esempio n. 11
0
    def __init__(self, verifycap, storage_broker, secret_holder,
                 terminator, history, download_status):
        assert isinstance(verifycap, uri.CHKFileVerifierURI)
        self._verifycap = verifycap
        self._storage_broker = storage_broker
        self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60)
        self.running = True
        if terminator:
            terminator.register(self) # calls self.stop() at stopService()
        # the rules are:
        # 1: Only send network requests if you're active (self.running is True)
        # 2: Use TimerService, not reactor.callLater
        # 3: You can do eventual-sends any time.
        # These rules should mean that once
        # stopService()+flushEventualQueue() fires, everything will be done.
        self._secret_holder = secret_holder
        self._history = history
        self._download_status = download_status

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
        self.share_hash_tree = IncompleteHashTree(N)

        # we guess the segment size, so Segmentation can pull non-initial
        # segments in a single roundtrip. This populates
        # .guessed_segment_size, .guessed_num_segments, and
        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
        # we'll need)
        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)

        # filled in when we parse a valid UEB
        self.have_UEB = False
        self.segment_size = None
        self.tail_segment_size = None
        self.tail_segment_padded = None
        self.num_segments = None
        self.block_size = None
        self.tail_block_size = None

        # things to track callers that want data

        # _segment_requests can have duplicates
        self._segment_requests = [] # (segnum, d, cancel_handle, logparent)
        self._active_segment = None # a SegmentFetcher, with .segnum

        self._segsize_observers = observer.OneShotObserverList()

        # we create one top-level logparent for this _Node, and another one
        # for each read() call. Segmentation and get_segment() messages are
        # associated with the read() call, everything else is tied to the
        # _Node's log entry.
        lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:"
                     " size=%(size)d,"
                     " guessed_segsize=%(guessed_segsize)d,"
                     " guessed_numsegs=%(guessed_numsegs)d",
                     si=self._si_prefix, size=verifycap.size,
                     guessed_segsize=self.guessed_segment_size,
                     guessed_numsegs=self.guessed_num_segments,
                     level=log.OPERATIONAL, umid="uJ0zAQ")
        self._lp = lp

        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
                                        self._download_status, lp)
        self._shares = set()
Esempio n. 12
0
class DownloadNode:
    """Internal class which manages downloads and holds state. External
    callers use CiphertextFileNode instead."""

    # Share._node points to me
    def __init__(self, verifycap, storage_broker, secret_holder,
                 terminator, history, download_status):
        assert isinstance(verifycap, uri.CHKFileVerifierURI)
        self._verifycap = verifycap
        self._storage_broker = storage_broker
        self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60)
        self.running = True
        if terminator:
            terminator.register(self) # calls self.stop() at stopService()
        # the rules are:
        # 1: Only send network requests if you're active (self.running is True)
        # 2: Use TimerService, not reactor.callLater
        # 3: You can do eventual-sends any time.
        # These rules should mean that once
        # stopService()+flushEventualQueue() fires, everything will be done.
        self._secret_holder = secret_holder
        self._history = history
        self._download_status = download_status

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
        self.share_hash_tree = IncompleteHashTree(N)

        # we guess the segment size, so Segmentation can pull non-initial
        # segments in a single roundtrip. This populates
        # .guessed_segment_size, .guessed_num_segments, and
        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
        # we'll need)
        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)

        # filled in when we parse a valid UEB
        self.have_UEB = False
        self.segment_size = None
        self.tail_segment_size = None
        self.tail_segment_padded = None
        self.num_segments = None
        self.block_size = None
        self.tail_block_size = None

        # things to track callers that want data

        # _segment_requests can have duplicates
        self._segment_requests = [] # (segnum, d, cancel_handle, logparent)
        self._active_segment = None # a SegmentFetcher, with .segnum

        self._segsize_observers = observer.OneShotObserverList()

        # we create one top-level logparent for this _Node, and another one
        # for each read() call. Segmentation and get_segment() messages are
        # associated with the read() call, everything else is tied to the
        # _Node's log entry.
        lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:"
                     " size=%(size)d,"
                     " guessed_segsize=%(guessed_segsize)d,"
                     " guessed_numsegs=%(guessed_numsegs)d",
                     si=self._si_prefix, size=verifycap.size,
                     guessed_segsize=self.guessed_segment_size,
                     guessed_numsegs=self.guessed_num_segments,
                     level=log.OPERATIONAL, umid="uJ0zAQ")
        self._lp = lp

        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
                                        self._download_status, lp)
        self._shares = set()

    def _build_guessed_tables(self, max_segment_size):
        size = min(self._verifycap.size, max_segment_size)
        s = mathutil.next_multiple(size, self._verifycap.needed_shares)
        self.guessed_segment_size = s
        r = self._calculate_sizes(self.guessed_segment_size)
        self.guessed_num_segments = r["num_segments"]
        # as with CommonShare, our ciphertext_hash_tree is a stub until we
        # get the real num_segments
        self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
        self.ciphertext_hash_tree_leaves = self.guessed_num_segments

    def __repr__(self):
        return "ImmutableDownloadNode(%s)" % (self._si_prefix,)

    def stop(self):
        # called by the Terminator at shutdown, mostly for tests
        if self._active_segment:
            self._active_segment.stop()
            self._active_segment = None
        self._sharefinder.stop()

    # things called by outside callers, via CiphertextFileNode. get_segment()
    # may also be called by Segmentation.

    def read(self, consumer, offset=0, size=None, read_ev=None):
        """I am the main entry point, from which FileNode.read() can get
        data. I feed the consumer with the desired range of ciphertext. I
        return a Deferred that fires (with the consumer) when the read is
        finished.

        Note that there is no notion of a 'file pointer': each call to read()
        uses an independent offset= value."""
        # for concurrent operations: each gets its own Segmentation manager
        if size is None:
            size = self._verifycap.size
        # ignore overruns: clip size so offset+size does not go past EOF, and
        # so size is not negative (which indicates that offset >= EOF)
        size = max(0, min(size, self._verifycap.size-offset))
        if read_ev is None:
            read_ev = self._download_status.add_read_event(offset, size, now())

        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     offset=offset, size=size,
                     level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww")
        if self._history:
            sp = self._history.stats_provider
            sp.count("downloader.files_downloaded", 1) # really read() calls
            sp.count("downloader.bytes_downloaded", size)
        if size == 0:
            read_ev.finished(now())
            # no data, so no producer, so no register/unregisterProducer
            return defer.succeed(consumer)
        s = Segmentation(self, offset, size, consumer, read_ev, lp)
        # this raises an interesting question: what segments to fetch? if
        # offset=0, always fetch the first segment, and then allow
        # Segmentation to be responsible for pulling the subsequent ones if
        # the first wasn't large enough. If offset>0, we're going to need an
        # extra roundtrip to get the UEB (and therefore the segment size)
        # before we can figure out which segment to get. TODO: allow the
        # offset-table-guessing code (which starts by guessing the segsize)
        # to assist the offset>0 process.
        d = s.start()
        def _done(res):
            read_ev.finished(now())
            return res
        d.addBoth(_done)
        return d

    def get_segment(self, segnum, logparent=None):
        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
        Deferred that fires with (offset,data) when the desired segment is
        available, and c is an object on which c.cancel() can be called to
        disavow interest in the segment (after which 'd' will never fire).

        You probably need to know the segment size before calling this,
        unless you want the first few bytes of the file. If you ask for a
        segment number which turns out to be too large, the Deferred will
        errback with BadSegmentNumberError.

        The Deferred fires with the offset of the first byte of the data
        segment, so that you can call get_segment() before knowing the
        segment size, and still know which data you received.

        The Deferred can also errback with other fatal problems, such as
        NotEnoughSharesError, NoSharesError, or BadCiphertextHashError.
        """
        lp = log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     segnum=segnum,
                     level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ")
        self._download_status.add_segment_request(segnum, now())
        d = defer.Deferred()
        c = Cancel(self._cancel_request)
        self._segment_requests.append( (segnum, d, c, lp) )
        self._start_new_segment()
        return (d, c)

    def get_segsize(self):
        """Return a Deferred that fires when we know the real segment size."""
        if self.segment_size:
            return defer.succeed(self.segment_size)
        # TODO: this downloads (and discards) the first segment of the file.
        # We could make this more efficient by writing
        # fetcher.SegmentSizeFetcher, with the job of finding a single valid
        # share and extracting the UEB. We'd add Share.get_UEB() to request
        # just the UEB.
        (d,c) = self.get_segment(0)
        # this ensures that an error during get_segment() will errback the
        # caller, so Repair won't wait forever on completely missing files
        d.addCallback(lambda ign: self._segsize_observers.when_fired())
        return d

    # things called by the Segmentation object used to transform
    # arbitrary-sized read() calls into quantized segment fetches

    def _start_new_segment(self):
        if self._active_segment is None and self._segment_requests:
            segnum = self._segment_requests[0][0]
            k = self._verifycap.needed_shares
            lp = self._segment_requests[0][3]
            log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d",
                    node=repr(self), segnum=segnum,
                    level=log.NOISY, parent=lp, umid="wAlnHQ")
            self._active_segment = fetcher = SegmentFetcher(self, segnum, k, lp)
            active_shares = [s for s in self._shares if s.is_alive()]
            fetcher.add_shares(active_shares) # this triggers the loop


    # called by our child ShareFinder
    def got_shares(self, shares):
        self._shares.update(shares)
        if self._active_segment:
            self._active_segment.add_shares(shares)
    def no_more_shares(self):
        self._no_more_shares = True
        if self._active_segment:
            self._active_segment.no_more_shares()

    # things called by our Share instances

    def validate_and_store_UEB(self, UEB_s):
        log.msg("validate_and_store_UEB",
                level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw")
        h = hashutil.uri_extension_hash(UEB_s)
        if h != self._verifycap.uri_extension_hash:
            raise BadHashError
        self._parse_and_store_UEB(UEB_s) # sets self._stuff
        # TODO: a malformed (but authentic) UEB could throw an assertion in
        # _parse_and_store_UEB, and we should abandon the download.
        self.have_UEB = True

        # inform the ShareFinder about our correct number of segments. This
        # will update the block-hash-trees in all existing CommonShare
        # instances, and will populate new ones with the correct value.
        self._sharefinder.update_num_segments()

    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})

        # Our job is a fast download, not verification, so we ignore any
        # redundant fields. The Verifier uses a different code path which
        # does not ignore them.

    def _calculate_sizes(self, segment_size):
        # segments of ciphertext
        size = self._verifycap.size
        k = self._verifycap.needed_shares

        # this assert matches the one in encode.py:127 inside
        # Encoded._got_all_encoding_parameters, where the UEB is constructed
        assert segment_size % k == 0

        # the last segment is usually short. We don't store a whole segsize,
        # but we do pad the segment up to a multiple of k, because the
        # encoder requires that.
        tail_segment_size = size % segment_size
        if tail_segment_size == 0:
            tail_segment_size = segment_size
        padded = mathutil.next_multiple(tail_segment_size, k)
        tail_segment_padded = padded

        num_segments = mathutil.div_ceil(size, segment_size)

        # each segment is turned into N blocks. All but the last are of size
        # block_size, and the last is of size tail_block_size
        block_size = segment_size / k
        tail_block_size = tail_segment_padded / k

        return { "tail_segment_size": tail_segment_size,
                 "tail_segment_padded": tail_segment_padded,
                 "num_segments": num_segments,
                 "block_size": block_size,
                 "tail_block_size": tail_block_size,
                 }


    def process_share_hashes(self, share_hashes):
        for hashnum in share_hashes:
            if hashnum >= len(self.share_hash_tree):
                # "BadHashError" is normally for e.g. a corrupt block. We
                # sort of abuse it here to mean a badly numbered hash (which
                # indicates corruption in the number bytes, rather than in
                # the data bytes).
                raise BadHashError("hashnum %d doesn't fit in hashtree(%d)"
                                   % (hashnum, len(self.share_hash_tree)))
        self.share_hash_tree.set_hashes(share_hashes)

    def get_desired_ciphertext_hashes(self, segnum):
        if segnum < self.ciphertext_hash_tree_leaves:
            return self.ciphertext_hash_tree.needed_hashes(segnum,
                                                           include_leaf=True)
        return []
    def get_needed_ciphertext_hashes(self, segnum):
        cht = self.ciphertext_hash_tree
        return cht.needed_hashes(segnum, include_leaf=True)

    def process_ciphertext_hashes(self, hashes):
        assert self.num_segments is not None
        # this may raise BadHashError or NotEnoughHashesError
        self.ciphertext_hash_tree.set_hashes(hashes)


    # called by our child SegmentFetcher

    def want_more_shares(self):
        self._sharefinder.hungry()

    def fetch_failed(self, sf, f):
        assert sf is self._active_segment
        # deliver error upwards
        for (d,c) in self._extract_requests(sf.segnum):
            eventually(self._deliver, d, c, f)
        self._active_segment = None
        self._start_new_segment()

    def process_blocks(self, segnum, blocks):
        d = defer.maybeDeferred(self._decode_blocks, segnum, blocks)
        d.addCallback(self._check_ciphertext_hash, segnum)
        def _deliver(result):
            ds = self._download_status
            if isinstance(result, Failure):
                ds.add_segment_error(segnum, now())
            else:
                (offset, segment, decodetime) = result
                ds.add_segment_delivery(segnum, now(),
                                        offset, len(segment), decodetime)
            log.msg(format="delivering segment(%(segnum)d)",
                    segnum=segnum,
                    level=log.OPERATIONAL, parent=self._lp,
                    umid="j60Ojg")
            for (d,c) in self._extract_requests(segnum):
                eventually(self._deliver, d, c, result)
            self._active_segment = None
            self._start_new_segment()
        d.addBoth(_deliver)
        d.addErrback(lambda f:
                     log.err("unhandled error during process_blocks",
                             failure=f, level=log.WEIRD,
                             parent=self._lp, umid="MkEsCg"))

    def _decode_blocks(self, segnum, blocks):
        tail = (segnum == self.num_segments-1)
        codec = self._codec
        block_size = self.block_size
        decoded_size = self.segment_size
        if tail:
            # account for the padding in the last segment
            codec = CRSDecoder()
            k, N = self._verifycap.needed_shares, self._verifycap.total_shares
            codec.set_params(self.tail_segment_padded, k, N)
            block_size = self.tail_block_size
            decoded_size = self.tail_segment_padded

        shares = []
        shareids = []
        for (shareid, share) in blocks.iteritems():
            assert len(share) == block_size
            shareids.append(shareid)
            shares.append(share)
        del blocks

        start = now()
        d = codec.decode(shares, shareids)   # segment
        del shares
        def _process(buffers):
            decodetime = now() - start
            segment = "".join(buffers)
            assert len(segment) == decoded_size
            del buffers
            if tail:
                segment = segment[:self.tail_segment_size]
            return (segment, decodetime)
        d.addCallback(_process)
        return d

    def _check_ciphertext_hash(self, (segment, decodetime), segnum):
        assert self._active_segment.segnum == segnum
        assert self.segment_size is not None
        offset = segnum * self.segment_size

        h = hashutil.crypttext_segment_hash(segment)
        try:
            self.ciphertext_hash_tree.set_hashes(leaves={segnum: h})
            return (offset, segment, decodetime)
        except (BadHashError, NotEnoughHashesError):
            format = ("hash failure in ciphertext_hash_tree:"
                      " segnum=%(segnum)d, SI=%(si)s")
            log.msg(format=format, segnum=segnum, si=self._si_prefix,
                    failure=Failure(),
                    level=log.WEIRD, parent=self._lp, umid="MTwNnw")
            # this is especially weird, because we made it past the share
            # hash tree. It implies that we're using the wrong encoding, or
            # that the uploader deliberately constructed a bad UEB.
            msg = format % {"segnum": segnum, "si": self._si_prefix}
            raise BadCiphertextHashError(msg)
Esempio n. 13
0
    def __init__(self, verifycap, storage_broker, secret_holder,
                 terminator, history, download_status):
        assert isinstance(verifycap, uri.CHKFileVerifierURI)
        self._verifycap = verifycap
        self._storage_broker = storage_broker
        self._si_prefix = base32.b2a(verifycap.storage_index[:8])[:12]
        self.running = True
        if terminator:
            terminator.register(self) # calls self.stop() at stopService()
        # the rules are:
        # 1: Only send network requests if you're active (self.running is True)
        # 2: Use TimerService, not reactor.callLater
        # 3: You can do eventual-sends any time.
        # These rules should mean that once
        # stopService()+flushEventualQueue() fires, everything will be done.
        self._secret_holder = secret_holder
        self._history = history
        self._download_status = download_status

        self.share_hash_tree = IncompleteHashTree(self._verifycap.total_shares)

        # we guess the segment size, so Segmentation can pull non-initial
        # segments in a single roundtrip. This populates
        # .guessed_segment_size, .guessed_num_segments, and
        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
        # we'll need)
        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)

        # filled in when we parse a valid UEB
        self.have_UEB = False
        self.segment_size = None
        self.tail_segment_size = None
        self.tail_segment_padded = None
        self.num_segments = None
        self.block_size = None
        self.tail_block_size = None

        # things to track callers that want data

        # _segment_requests can have duplicates
        self._segment_requests = [] # (segnum, d, cancel_handle, seg_ev, lp)
        self._active_segment = None # a SegmentFetcher, with .segnum

        self._segsize_observers = observer.OneShotObserverList()

        # we create one top-level logparent for this _Node, and another one
        # for each read() call. Segmentation and get_segment() messages are
        # associated with the read() call, everything else is tied to the
        # _Node's log entry.
        lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:"
                     " size=%(size)d,"
                     " guessed_segsize=%(guessed_segsize)d,"
                     " guessed_numsegs=%(guessed_numsegs)d",
                     si=self._si_prefix, size=verifycap.size,
                     guessed_segsize=self.guessed_segment_size,
                     guessed_numsegs=self.guessed_num_segments,
                     level=log.OPERATIONAL, umid="uJ0zAQ")
        self._lp = lp

        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
                                        self._download_status, lp)
        self._shares = set()
Esempio n. 14
0
class DownloadNode(object):
    """Internal class which manages downloads and holds state. External
    callers use CiphertextFileNode instead."""

    # Share._node points to me
    def __init__(self, verifycap, storage_broker, secret_holder,
                 terminator, history, download_status):
        assert isinstance(verifycap, uri.CHKFileVerifierURI)
        self._verifycap = verifycap
        self._storage_broker = storage_broker
        self._si_prefix = base32.b2a(verifycap.storage_index[:8])[:12]
        self.running = True
        if terminator:
            terminator.register(self) # calls self.stop() at stopService()
        # the rules are:
        # 1: Only send network requests if you're active (self.running is True)
        # 2: Use TimerService, not reactor.callLater
        # 3: You can do eventual-sends any time.
        # These rules should mean that once
        # stopService()+flushEventualQueue() fires, everything will be done.
        self._secret_holder = secret_holder
        self._history = history
        self._download_status = download_status

        self.share_hash_tree = IncompleteHashTree(self._verifycap.total_shares)

        # we guess the segment size, so Segmentation can pull non-initial
        # segments in a single roundtrip. This populates
        # .guessed_segment_size, .guessed_num_segments, and
        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
        # we'll need)
        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)

        # filled in when we parse a valid UEB
        self.have_UEB = False
        self.segment_size = None
        self.tail_segment_size = None
        self.tail_segment_padded = None
        self.num_segments = None
        self.block_size = None
        self.tail_block_size = None

        # things to track callers that want data

        # _segment_requests can have duplicates
        self._segment_requests = [] # (segnum, d, cancel_handle, seg_ev, lp)
        self._active_segment = None # a SegmentFetcher, with .segnum

        self._segsize_observers = observer.OneShotObserverList()

        # we create one top-level logparent for this _Node, and another one
        # for each read() call. Segmentation and get_segment() messages are
        # associated with the read() call, everything else is tied to the
        # _Node's log entry.
        lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:"
                     " size=%(size)d,"
                     " guessed_segsize=%(guessed_segsize)d,"
                     " guessed_numsegs=%(guessed_numsegs)d",
                     si=self._si_prefix, size=verifycap.size,
                     guessed_segsize=self.guessed_segment_size,
                     guessed_numsegs=self.guessed_num_segments,
                     level=log.OPERATIONAL, umid="uJ0zAQ")
        self._lp = lp

        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
                                        self._download_status, lp)
        self._shares = set()

    def _build_guessed_tables(self, max_segment_size):
        size = min(self._verifycap.size, max_segment_size)
        s = mathutil.next_multiple(size, self._verifycap.needed_shares)
        self.guessed_segment_size = s
        r = self._calculate_sizes(self.guessed_segment_size)
        self.guessed_num_segments = r["num_segments"]
        # as with CommonShare, our ciphertext_hash_tree is a stub until we
        # get the real num_segments
        self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
        self.ciphertext_hash_tree_leaves = self.guessed_num_segments

    def __repr__(self):
        return "ImmutableDownloadNode(%r)" % (self._si_prefix,)

    def stop(self):
        # called by the Terminator at shutdown, mostly for tests
        if self._active_segment:
            self._active_segment.stop()
            self._active_segment = None
        self._sharefinder.stop()

    # things called by outside callers, via CiphertextFileNode. get_segment()
    # may also be called by Segmentation.

    def read(self, consumer, offset, size):
        """I am the main entry point, from which FileNode.read() can get
        data. I feed the consumer with the desired range of ciphertext. I
        return a Deferred that fires (with the consumer) when the read is
        finished.

        Note that there is no notion of a 'file pointer': each call to read()
        uses an independent offset= value.
        """
        # for concurrent operations: each gets its own Segmentation manager
        if size is None:
            size = self._verifycap.size
        # ignore overruns: clip size so offset+size does not go past EOF, and
        # so size is not negative (which indicates that offset >= EOF)
        size = max(0, min(size, self._verifycap.size-offset))

        read_ev = self._download_status.add_read_event(offset, size, now())
        if IDownloadStatusHandlingConsumer.providedBy(consumer):
            consumer.set_download_status_read_event(read_ev)
            consumer.set_download_status(self._download_status)

        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     offset=offset, size=size,
                     level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww")
        if self._history:
            sp = self._history.stats_provider
            sp.count("downloader.files_downloaded", 1) # really read() calls
            sp.count("downloader.bytes_downloaded", size)
        if size == 0:
            read_ev.finished(now())
            # no data, so no producer, so no register/unregisterProducer
            return defer.succeed(consumer)

        # for concurrent operations, each read() gets its own Segmentation
        # manager
        s = Segmentation(self, offset, size, consumer, read_ev, lp)

        # this raises an interesting question: what segments to fetch? if
        # offset=0, always fetch the first segment, and then allow
        # Segmentation to be responsible for pulling the subsequent ones if
        # the first wasn't large enough. If offset>0, we're going to need an
        # extra roundtrip to get the UEB (and therefore the segment size)
        # before we can figure out which segment to get. TODO: allow the
        # offset-table-guessing code (which starts by guessing the segsize)
        # to assist the offset>0 process.
        d = s.start()
        def _done(res):
            read_ev.finished(now())
            return res
        d.addBoth(_done)
        return d

    def get_segment(self, segnum, logparent=None):
        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
        Deferred that fires with (offset,data) when the desired segment is
        available, and c is an object on which c.cancel() can be called to
        disavow interest in the segment (after which 'd' will never fire).

        You probably need to know the segment size before calling this,
        unless you want the first few bytes of the file. If you ask for a
        segment number which turns out to be too large, the Deferred will
        errback with BadSegmentNumberError.

        The Deferred fires with the offset of the first byte of the data
        segment, so that you can call get_segment() before knowing the
        segment size, and still know which data you received.

        The Deferred can also errback with other fatal problems, such as
        NotEnoughSharesError, NoSharesError, or BadCiphertextHashError.
        """
        lp = log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     segnum=segnum,
                     level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ")
        seg_ev = self._download_status.add_segment_request(segnum, now())
        d = defer.Deferred()
        c = Cancel(self._cancel_request)
        self._segment_requests.append( (segnum, d, c, seg_ev, lp) )
        self._start_new_segment()
        return (d, c)

    def get_segsize(self):
        """Return a Deferred that fires when we know the real segment size."""
        if self.segment_size:
            return defer.succeed(self.segment_size)
        # TODO: this downloads (and discards) the first segment of the file.
        # We could make this more efficient by writing
        # fetcher.SegmentSizeFetcher, with the job of finding a single valid
        # share and extracting the UEB. We'd add Share.get_UEB() to request
        # just the UEB.
        (d,c) = self.get_segment(0)
        # this ensures that an error during get_segment() will errback the
        # caller, so Repair won't wait forever on completely missing files
        d.addCallback(lambda ign: self._segsize_observers.when_fired())
        return d

    # things called by the Segmentation object used to transform
    # arbitrary-sized read() calls into quantized segment fetches

    def _start_new_segment(self):
        if self._active_segment is None and self._segment_requests:
            (segnum, d, c, seg_ev, lp) = self._segment_requests[0]
            k = self._verifycap.needed_shares
            log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d",
                    node=repr(self), segnum=segnum,
                    level=log.NOISY, parent=lp, umid="wAlnHQ")
            self._active_segment = fetcher = SegmentFetcher(self, segnum, k, lp)
            seg_ev.activate(now())
            active_shares = [s for s in self._shares if s.is_alive()]
            fetcher.add_shares(active_shares) # this triggers the loop


    # called by our child ShareFinder
    def got_shares(self, shares):
        self._shares.update(shares)
        if self._active_segment:
            self._active_segment.add_shares(shares)
    def no_more_shares(self):
        self._no_more_shares = True
        if self._active_segment:
            self._active_segment.no_more_shares()

    # things called by our Share instances

    def validate_and_store_UEB(self, UEB_s):
        log.msg("validate_and_store_UEB",
                level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw")
        h = hashutil.uri_extension_hash(UEB_s)
        if h != self._verifycap.uri_extension_hash:
            raise BadHashError
        self._parse_and_store_UEB(UEB_s) # sets self._stuff
        # TODO: a malformed (but authentic) UEB could throw an assertion in
        # _parse_and_store_UEB, and we should abandon the download.
        self.have_UEB = True

        # inform the ShareFinder about our correct number of segments. This
        # will update the block-hash-trees in all existing CommonShare
        # instances, and will populate new ones with the correct value.
        self._sharefinder.update_num_segments()

    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})

        # Our job is a fast download, not verification, so we ignore any
        # redundant fields. The Verifier uses a different code path which
        # does not ignore them.

    def _calculate_sizes(self, segment_size):
        # segments of ciphertext
        size = self._verifycap.size
        k = self._verifycap.needed_shares

        # this assert matches the one in encode.py:127 inside
        # Encoded._got_all_encoding_parameters, where the UEB is constructed
        assert segment_size % k == 0

        # the last segment is usually short. We don't store a whole segsize,
        # but we do pad the segment up to a multiple of k, because the
        # encoder requires that.
        tail_segment_size = size % segment_size
        if tail_segment_size == 0:
            tail_segment_size = segment_size
        padded = mathutil.next_multiple(tail_segment_size, k)
        tail_segment_padded = padded

        num_segments = mathutil.div_ceil(size, segment_size)

        # each segment is turned into N blocks. All but the last are of size
        # block_size, and the last is of size tail_block_size
        block_size = segment_size // k
        tail_block_size = tail_segment_padded // k

        return { "tail_segment_size": tail_segment_size,
                 "tail_segment_padded": tail_segment_padded,
                 "num_segments": num_segments,
                 "block_size": block_size,
                 "tail_block_size": tail_block_size
                 }


    def process_share_hashes(self, share_hashes):
        for hashnum in share_hashes:
            if hashnum >= len(self.share_hash_tree):
                # "BadHashError" is normally for e.g. a corrupt block. We
                # sort of abuse it here to mean a badly numbered hash (which
                # indicates corruption in the number bytes, rather than in
                # the data bytes).
                raise BadHashError("hashnum %d doesn't fit in hashtree(%d)"
                                   % (hashnum, len(self.share_hash_tree)))
        self.share_hash_tree.set_hashes(share_hashes)

    def get_desired_ciphertext_hashes(self, segnum):
        if segnum < self.ciphertext_hash_tree_leaves:
            return self.ciphertext_hash_tree.needed_hashes(segnum,
                                                           include_leaf=True)
        return []
    def get_needed_ciphertext_hashes(self, segnum):
        cht = self.ciphertext_hash_tree
        return cht.needed_hashes(segnum, include_leaf=True)

    def process_ciphertext_hashes(self, hashes):
        assert self.num_segments is not None
        # this may raise BadHashError or NotEnoughHashesError
        self.ciphertext_hash_tree.set_hashes(hashes)


    # called by our child SegmentFetcher

    def want_more_shares(self):
        self._sharefinder.hungry()

    def fetch_failed(self, sf, f):
        assert sf is self._active_segment
        # deliver error upwards
        for (d,c,seg_ev) in self._extract_requests(sf.segnum):
            seg_ev.error(now())
            eventually(self._deliver, d, c, f)
        self._active_segment = None
        self._start_new_segment()

    def process_blocks(self, segnum, blocks):
        start = now()
        d = defer.maybeDeferred(self._decode_blocks, segnum, blocks)
        d.addCallback(self._check_ciphertext_hash, segnum)
        def _deliver(result):
            log.msg(format="delivering segment(%(segnum)d)",
                    segnum=segnum,
                    level=log.OPERATIONAL, parent=self._lp,
                    umid="j60Ojg")
            when = now()
            if isinstance(result, Failure):
                # this catches failures in decode or ciphertext hash
                for (d,c,seg_ev) in self._extract_requests(segnum):
                    seg_ev.error(when)
                    eventually(self._deliver, d, c, result)
            else:
                (offset, segment, decodetime) = result
                for (d,c,seg_ev) in self._extract_requests(segnum):
                    # when we have two requests for the same segment, the
                    # second one will not be "activated" before the data is
                    # delivered, so to allow the status-reporting code to see
                    # consistent behavior, we activate them all now. The
                    # SegmentEvent will ignore duplicate activate() calls.
                    # Note that this will result in an inaccurate "receive
                    # speed" for the second request.
                    seg_ev.activate(when)
                    seg_ev.deliver(when, offset, len(segment), decodetime)
                    eventually(self._deliver, d, c, result)
            self._download_status.add_misc_event("process_block", start, now())
            self._active_segment = None
            self._start_new_segment()
        d.addBoth(_deliver)
        d.addErrback(log.err, "unhandled error during process_blocks",
                     level=log.WEIRD, parent=self._lp, umid="MkEsCg")

    def _decode_blocks(self, segnum, blocks):
        start = now()
        tail = (segnum == self.num_segments-1)
        codec = self._codec
        block_size = self.block_size
        decoded_size = self.segment_size
        if tail:
            # account for the padding in the last segment
            codec = CRSDecoder()
            k, N = self._verifycap.needed_shares, self._verifycap.total_shares
            codec.set_params(self.tail_segment_padded, k, N)
            block_size = self.tail_block_size
            decoded_size = self.tail_segment_padded

        shares = []
        shareids = []
        for (shareid, share) in blocks.items():
            assert len(share) == block_size
            shareids.append(shareid)
            shares.append(share)
        del blocks

        d = codec.decode(shares, shareids)   # segment
        del shares
        def _process(buffers):
            decodetime = now() - start
            segment = b"".join(buffers)
            assert len(segment) == decoded_size
            del buffers
            if tail:
                segment = segment[:self.tail_segment_size]
            self._download_status.add_misc_event("decode", start, now())
            return (segment, decodetime)
        d.addCallback(_process)
        return d

    def _check_ciphertext_hash(self, segment_and_decodetime, segnum):
        (segment, decodetime) = segment_and_decodetime
        start = now()
        assert self._active_segment.segnum == segnum
        assert self.segment_size is not None
        offset = segnum * self.segment_size

        h = hashutil.crypttext_segment_hash(segment)
        try:
            self.ciphertext_hash_tree.set_hashes(leaves={segnum: h})
            self._download_status.add_misc_event("CThash", start, now())
            return (offset, segment, decodetime)
        except (BadHashError, NotEnoughHashesError):
            format = ("hash failure in ciphertext_hash_tree:"
                      " segnum=%(segnum)d, SI=%(si)r")
            log.msg(format=format, segnum=segnum, si=self._si_prefix,
                    failure=Failure(),
                    level=log.WEIRD, parent=self._lp, umid="MTwNnw")
            # this is especially weird, because we made it past the share
            # hash tree. It implies that we're using the wrong encoding, or
            # that the uploader deliberately constructed a bad UEB.
            msg = format % {"segnum": segnum, "si": self._si_prefix}
            raise BadCiphertextHashError(msg)

    def _deliver(self, d, c, result):
        # this method exists to handle cancel() that occurs between
        # _got_segment and _deliver
        if c.active:
            c.active = False # it is now too late to cancel
            d.callback(result) # might actually be an errback

    def _extract_requests(self, segnum):
        """Remove matching requests and return their (d,c) tuples so that the
        caller can retire them."""
        retire = [(d,c,seg_ev)
                  for (segnum0,d,c,seg_ev,lp) in self._segment_requests
                  if segnum0 == segnum]
        self._segment_requests = [t for t in self._segment_requests
                                  if t[0] != segnum]
        return retire

    def _cancel_request(self, cancel):
        self._segment_requests = [t for t in self._segment_requests
                                  if t[2] != cancel]
        segnums = [segnum for (segnum,d,c,seg_ev,lp) in self._segment_requests]
        # self._active_segment might be None in rare circumstances, so make
        # sure we tolerate it
        if self._active_segment and self._active_segment.segnum not in segnums:
            self._active_segment.stop()
            self._active_segment = None
            self._start_new_segment()

    # called by ShareFinder to choose hashtree sizes in CommonShares, and by
    # SegmentFetcher to tell if it is still fetching a valid segnum.
    def get_num_segments(self):
        # returns (best_num_segments, authoritative)
        if self.num_segments is None:
            return (self.guessed_num_segments, False)
        return (self.num_segments, True)