Exemple #1
0
 def make_versionmap(self):
     """Return a dict that maps versionid to sets of (shnum, peerid,
     timestamp) tuples."""
     versionmap = DictOfSets()
     for ( (peerid, shnum), (verinfo, timestamp) ) in self.servermap.items():
         versionmap.add(verinfo, (shnum, peerid, timestamp))
     return versionmap
Exemple #2
0
    def download(self):
        self._done_deferred = defer.Deferred()
        self._started = time.time()
        self._status.set_status("Retrieving Shares")

        # first, which servers can we use?
        versionmap = self.servermap.make_versionmap()
        shares = versionmap[self.verinfo]
        # this sharemap is consumed as we decide to send requests
        self.remaining_sharemap = DictOfSets()
        for (shnum, peerid, timestamp) in shares:
            self.remaining_sharemap.add(shnum, peerid)

        self.shares = {} # maps shnum to validated blocks

        # how many shares do we need?
        (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
         offsets_tuple) = self.verinfo
        assert len(self.remaining_sharemap) >= k
        # we start with the lowest shnums we have available, since FEC is
        # faster if we're using "primary shares"
        self.active_shnums = set(sorted(self.remaining_sharemap.keys())[:k])
        for shnum in self.active_shnums:
            # we use an arbitrary peer who has the share. If shares are
            # doubled up (more than one share per peer), we could make this
            # run faster by spreading the load among multiple peers. But the
            # algorithm to do that is more complicated than I want to write
            # right now, and a well-provisioned grid shouldn't have multiple
            # shares per peer.
            peerid = list(self.remaining_sharemap[shnum])[0]
            self.get_data(shnum, peerid)

        # control flow beyond this point: state machine. Receiving responses
        # from queries is the input. We might send out more queries, or we
        # might produce a result.

        return self._done_deferred
Exemple #3
0
 def make_sharemap(self):
     """Return a dict that maps shnum to a set of peerds that hold it."""
     sharemap = DictOfSets()
     for (peerid, shnum) in self.servermap:
         sharemap.add(shnum, peerid)
     return sharemap
Exemple #4
0
class Retrieve:
    # this class is currently single-use. Eventually (in MDMF) we will make
    # it multi-use, in which case you can call download(range) multiple
    # times, and each will have a separate response chain. However the
    # Retrieve object will remain tied to a specific version of the file, and
    # will use a single ServerMap instance.

    def __init__(self, filenode, servermap, verinfo, fetch_privkey=False):
        self._node = filenode
        assert self._node.get_pubkey()
        self._storage_index = filenode.get_storage_index()
        assert self._node.get_readkey()
        self._last_failure = None
        prefix = si_b2a(self._storage_index)[:5]
        self._log_number = log.msg("Retrieve(%s): starting" % prefix)
        self._outstanding_queries = {} # maps (peerid,shnum) to start_time
        self._running = True
        self._decoding = False
        self._bad_shares = set()

        self.servermap = servermap
        assert self._node.get_pubkey()
        self.verinfo = verinfo
        # during repair, we may be called upon to grab the private key, since
        # it wasn't picked up during a verify=False checker run, and we'll
        # need it for repair to generate the a new version.
        self._need_privkey = fetch_privkey
        if self._node.get_privkey():
            self._need_privkey = False

        self._status = RetrieveStatus()
        self._status.set_storage_index(self._storage_index)
        self._status.set_helper(False)
        self._status.set_progress(0.0)
        self._status.set_active(True)
        (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
         offsets_tuple) = self.verinfo
        self._status.set_size(datalength)
        self._status.set_encoding(k, N)

    def get_status(self):
        return self._status

    def log(self, *args, **kwargs):
        if "parent" not in kwargs:
            kwargs["parent"] = self._log_number
        if "facility" not in kwargs:
            kwargs["facility"] = "tahoe.mutable.retrieve"
        return log.msg(*args, **kwargs)

    def download(self):
        self._done_deferred = defer.Deferred()
        self._started = time.time()
        self._status.set_status("Retrieving Shares")

        # first, which servers can we use?
        versionmap = self.servermap.make_versionmap()
        shares = versionmap[self.verinfo]
        # this sharemap is consumed as we decide to send requests
        self.remaining_sharemap = DictOfSets()
        for (shnum, peerid, timestamp) in shares:
            self.remaining_sharemap.add(shnum, peerid)

        self.shares = {} # maps shnum to validated blocks

        # how many shares do we need?
        (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
         offsets_tuple) = self.verinfo
        assert len(self.remaining_sharemap) >= k
        # we start with the lowest shnums we have available, since FEC is
        # faster if we're using "primary shares"
        self.active_shnums = set(sorted(self.remaining_sharemap.keys())[:k])
        for shnum in self.active_shnums:
            # we use an arbitrary peer who has the share. If shares are
            # doubled up (more than one share per peer), we could make this
            # run faster by spreading the load among multiple peers. But the
            # algorithm to do that is more complicated than I want to write
            # right now, and a well-provisioned grid shouldn't have multiple
            # shares per peer.
            peerid = list(self.remaining_sharemap[shnum])[0]
            self.get_data(shnum, peerid)

        # control flow beyond this point: state machine. Receiving responses
        # from queries is the input. We might send out more queries, or we
        # might produce a result.

        return self._done_deferred

    def get_data(self, shnum, peerid):
        self.log(format="sending sh#%(shnum)d request to [%(peerid)s]",
                 shnum=shnum,
                 peerid=idlib.shortnodeid_b2a(peerid),
                 level=log.NOISY)
        ss = self.servermap.connections[peerid]
        started = time.time()
        (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
         offsets_tuple) = self.verinfo
        offsets = dict(offsets_tuple)

        # we read the checkstring, to make sure that the data we grab is from
        # the right version.
        readv = [ (0, struct.calcsize(SIGNED_PREFIX)) ]

        # We also read the data, and the hashes necessary to validate them
        # (share_hash_chain, block_hash_tree, share_data). We don't read the
        # signature or the pubkey, since that was handled during the
        # servermap phase, and we'll be comparing the share hash chain
        # against the roothash that was validated back then.

        readv.append( (offsets['share_hash_chain'],
                       offsets['enc_privkey'] - offsets['share_hash_chain'] ) )

        # if we need the private key (for repair), we also fetch that
        if self._need_privkey:
            readv.append( (offsets['enc_privkey'],
                           offsets['EOF'] - offsets['enc_privkey']) )

        m = Marker()
        self._outstanding_queries[m] = (peerid, shnum, started)

        # ask the cache first
        got_from_cache = False
        datavs = []
        for (offset, length) in readv:
            (data, timestamp) = self._node._read_from_cache(self.verinfo, shnum,
                                                            offset, length)
            if data is not None:
                datavs.append(data)
        if len(datavs) == len(readv):
            self.log("got data from cache")
            got_from_cache = True
            d = fireEventually({shnum: datavs})
            # datavs is a dict mapping shnum to a pair of strings
        else:
            d = self._do_read(ss, peerid, self._storage_index, [shnum], readv)
        self.remaining_sharemap.discard(shnum, peerid)

        d.addCallback(self._got_results, m, peerid, started, got_from_cache)
        d.addErrback(self._query_failed, m, peerid)
        # errors that aren't handled by _query_failed (and errors caused by
        # _query_failed) get logged, but we still want to check for doneness.
        def _oops(f):
            self.log(format="problem in _query_failed for sh#%(shnum)d to %(peerid)s",
                     shnum=shnum,
                     peerid=idlib.shortnodeid_b2a(peerid),
                     failure=f,
                     level=log.WEIRD, umid="W0xnQA")
        d.addErrback(_oops)
        d.addBoth(self._check_for_done)
        # any error during _check_for_done means the download fails. If the
        # download is successful, _check_for_done will fire _done by itself.
        d.addErrback(self._done)
        d.addErrback(log.err)
        return d # purely for testing convenience

    def _do_read(self, ss, peerid, storage_index, shnums, readv):
        # isolate the callRemote to a separate method, so tests can subclass
        # Publish and override it
        d = ss.callRemote("slot_readv", storage_index, shnums, readv)
        return d

    def remove_peer(self, peerid):
        for shnum in list(self.remaining_sharemap.keys()):
            self.remaining_sharemap.discard(shnum, peerid)

    def _got_results(self, datavs, marker, peerid, started, got_from_cache):
        now = time.time()
        elapsed = now - started
        if not got_from_cache:
            self._status.add_fetch_timing(peerid, elapsed)
        self.log(format="got results (%(shares)d shares) from [%(peerid)s]",
                 shares=len(datavs),
                 peerid=idlib.shortnodeid_b2a(peerid),
                 level=log.NOISY)
        self._outstanding_queries.pop(marker, None)
        if not self._running:
            return

        # note that we only ask for a single share per query, so we only
        # expect a single share back. On the other hand, we use the extra
        # shares if we get them.. seems better than an assert().

        for shnum,datav in datavs.items():
            (prefix, hash_and_data) = datav[:2]
            try:
                self._got_results_one_share(shnum, peerid,
                                            prefix, hash_and_data)
            except CorruptShareError, e:
                # log it and give the other shares a chance to be processed
                f = failure.Failure()
                self.log(format="bad share: %(f_value)s",
                         f_value=str(f.value), failure=f,
                         level=log.WEIRD, umid="7fzWZw")
                self.notify_server_corruption(peerid, shnum, str(e))
                self.remove_peer(peerid)
                self.servermap.mark_bad_share(peerid, shnum, prefix)
                self._bad_shares.add( (peerid, shnum) )
                self._status.problems[peerid] = f
                self._last_failure = f
                pass
            if self._need_privkey and len(datav) > 2:
                lp = None
                self._try_to_validate_privkey(datav[2], peerid, shnum, lp)
Exemple #5
0
    def _send_shares(self, needed):
        self.log("_send_shares")

        # we're finally ready to send out our shares. If we encounter any
        # surprises here, it's because somebody else is writing at the same
        # time. (Note: in the future, when we remove the _query_peers() step
        # and instead speculate about [or remember] which shares are where,
        # surprises here are *not* indications of UncoordinatedWriteError,
        # and we'll need to respond to them more gracefully.)

        # needed is a set of (peerid, shnum) tuples. The first thing we do is
        # organize it by peerid.

        peermap = DictOfSets()
        for (peerid, shnum) in needed:
            peermap.add(peerid, shnum)

        # the next thing is to build up a bunch of test vectors. The
        # semantics of Publish are that we perform the operation if the world
        # hasn't changed since the ServerMap was constructed (more or less).
        # For every share we're trying to place, we create a test vector that
        # tests to see if the server*share still corresponds to the
        # map.

        all_tw_vectors = {} # maps peerid to tw_vectors
        sm = self._servermap.servermap

        for key in needed:
            (peerid, shnum) = key

            if key in sm:
                # an old version of that share already exists on the
                # server, according to our servermap. We will create a
                # request that attempts to replace it.
                old_versionid, old_timestamp = sm[key]
                (old_seqnum, old_root_hash, old_salt, old_segsize,
                 old_datalength, old_k, old_N, old_prefix,
                 old_offsets_tuple) = old_versionid
                old_checkstring = pack_checkstring(old_seqnum,
                                                   old_root_hash,
                                                   old_salt)
                testv = (0, len(old_checkstring), "eq", old_checkstring)

            elif key in self.bad_share_checkstrings:
                old_checkstring = self.bad_share_checkstrings[key]
                testv = (0, len(old_checkstring), "eq", old_checkstring)

            else:
                # add a testv that requires the share not exist

                # Unfortunately, foolscap-0.2.5 has a bug in the way inbound
                # constraints are handled. If the same object is referenced
                # multiple times inside the arguments, foolscap emits a
                # 'reference' token instead of a distinct copy of the
                # argument. The bug is that these 'reference' tokens are not
                # accepted by the inbound constraint code. To work around
                # this, we need to prevent python from interning the
                # (constant) tuple, by creating a new copy of this vector
                # each time.

                # This bug is fixed in foolscap-0.2.6, and even though this
                # version of Tahoe requires foolscap-0.3.1 or newer, we are
                # supposed to be able to interoperate with older versions of
                # Tahoe which are allowed to use older versions of foolscap,
                # including foolscap-0.2.5 . In addition, I've seen other
                # foolscap problems triggered by 'reference' tokens (see #541
                # for details). So we must keep this workaround in place.

                #testv = (0, 1, 'eq', "")
                testv = tuple([0, 1, 'eq', ""])

            testvs = [testv]
            # the write vector is simply the share
            writev = [(0, self.shares[shnum])]

            if peerid not in all_tw_vectors:
                all_tw_vectors[peerid] = {}
                # maps shnum to (testvs, writevs, new_length)
            assert shnum not in all_tw_vectors[peerid]

            all_tw_vectors[peerid][shnum] = (testvs, writev, None)

        # we read the checkstring back from each share, however we only use
        # it to detect whether there was a new share that we didn't know
        # about. The success or failure of the write will tell us whether
        # there was a collision or not. If there is a collision, the first
        # thing we'll do is update the servermap, which will find out what
        # happened. We could conceivably reduce a roundtrip by using the
        # readv checkstring to populate the servermap, but really we'd have
        # to read enough data to validate the signatures too, so it wouldn't
        # be an overall win.
        read_vector = [(0, struct.calcsize(SIGNED_PREFIX))]

        # ok, send the messages!
        self.log("sending %d shares" % len(all_tw_vectors), level=log.NOISY)
        started = time.time()
        for (peerid, tw_vectors) in all_tw_vectors.items():

            write_enabler = self._node.get_write_enabler(peerid)
            renew_secret = self._node.get_renewal_secret(peerid)
            cancel_secret = self._node.get_cancel_secret(peerid)
            secrets = (write_enabler, renew_secret, cancel_secret)
            shnums = tw_vectors.keys()

            for shnum in shnums:
                self.outstanding.add( (peerid, shnum) )

            d = self._do_testreadwrite(peerid, secrets,
                                       tw_vectors, read_vector)
            d.addCallbacks(self._got_write_answer, self._got_write_error,
                           callbackArgs=(peerid, shnums, started),
                           errbackArgs=(peerid, shnums, started))
            # tolerate immediate errback, like with DeadReferenceError
            d.addBoth(fireEventually)
            d.addCallback(self.loop)
            d.addErrback(self._fatal_error)

        self._update_status()
        self.log("%d shares sent" % len(all_tw_vectors), level=log.NOISY)
Exemple #6
0
    def update_goal(self):
        # if log.recording_noisy
        if True:
            self.log_goal(self.goal, "before update: ")

        # first, remove any bad peers from our goal
        self.goal = set([ (peerid, shnum)
                          for (peerid, shnum) in self.goal
                          if peerid not in self.bad_peers ])

        # find the homeless shares:
        homefull_shares = set([shnum for (peerid, shnum) in self.goal])
        homeless_shares = set(range(self.total_shares)) - homefull_shares
        homeless_shares = sorted(list(homeless_shares))
        # place them somewhere. We prefer unused servers at the beginning of
        # the available peer list.

        if not homeless_shares:
            return

        # if an old share X is on a node, put the new share X there too.
        # TODO: 1: redistribute shares to achieve one-per-peer, by copying
        #       shares from existing peers to new (less-crowded) ones. The
        #       old shares must still be updated.
        # TODO: 2: move those shares instead of copying them, to reduce future
        #       update work

        # this is a bit CPU intensive but easy to analyze. We create a sort
        # order for each peerid. If the peerid is marked as bad, we don't
        # even put them in the list. Then we care about the number of shares
        # which have already been assigned to them. After that we care about
        # their permutation order.
        old_assignments = DictOfSets()
        for (peerid, shnum) in self.goal:
            old_assignments.add(peerid, shnum)

        peerlist = []
        for i, (peerid, ss) in enumerate(self.full_peerlist):
            if peerid in self.bad_peers:
                continue
            entry = (len(old_assignments.get(peerid, [])), i, peerid, ss)
            peerlist.append(entry)
        peerlist.sort()

        if not peerlist:
            raise NotEnoughServersError("Ran out of non-bad servers, "
                                        "first_error=%s" %
                                        str(self._first_write_error),
                                        self._first_write_error)

        # we then index this peerlist with an integer, because we may have to
        # wrap. We update the goal as we go.
        i = 0
        for shnum in homeless_shares:
            (ignored1, ignored2, peerid, ss) = peerlist[i]
            # if we are forced to send a share to a server that already has
            # one, we may have two write requests in flight, and the
            # servermap (which was computed before either request was sent)
            # won't reflect the new shares, so the second response will be
            # surprising. There is code in _got_write_answer() to tolerate
            # this, otherwise it would cause the publish to fail with an
            # UncoordinatedWriteError. See #546 for details of the trouble
            # this used to cause.
            self.goal.add( (peerid, shnum) )
            self.connections[peerid] = ss
            i += 1
            if i >= len(peerlist):
                i = 0
        if True:
            self.log_goal(self.goal, "after update: ")