def make_versionmap(self): """Return a dict that maps versionid to sets of (shnum, peerid, timestamp) tuples.""" versionmap = DictOfSets() for ( (peerid, shnum), (verinfo, timestamp) ) in self.servermap.items(): versionmap.add(verinfo, (shnum, peerid, timestamp)) return versionmap
def make_sharemap(self): """Return a dict that maps shnum to a set of peerds that hold it.""" sharemap = DictOfSets() for (peerid, shnum) in self.servermap: sharemap.add(shnum, peerid) return sharemap
def _send_shares(self, needed): self.log("_send_shares") # we're finally ready to send out our shares. If we encounter any # surprises here, it's because somebody else is writing at the same # time. (Note: in the future, when we remove the _query_peers() step # and instead speculate about [or remember] which shares are where, # surprises here are *not* indications of UncoordinatedWriteError, # and we'll need to respond to them more gracefully.) # needed is a set of (peerid, shnum) tuples. The first thing we do is # organize it by peerid. peermap = DictOfSets() for (peerid, shnum) in needed: peermap.add(peerid, shnum) # the next thing is to build up a bunch of test vectors. The # semantics of Publish are that we perform the operation if the world # hasn't changed since the ServerMap was constructed (more or less). # For every share we're trying to place, we create a test vector that # tests to see if the server*share still corresponds to the # map. all_tw_vectors = {} # maps peerid to tw_vectors sm = self._servermap.servermap for key in needed: (peerid, shnum) = key if key in sm: # an old version of that share already exists on the # server, according to our servermap. We will create a # request that attempts to replace it. old_versionid, old_timestamp = sm[key] (old_seqnum, old_root_hash, old_salt, old_segsize, old_datalength, old_k, old_N, old_prefix, old_offsets_tuple) = old_versionid old_checkstring = pack_checkstring(old_seqnum, old_root_hash, old_salt) testv = (0, len(old_checkstring), "eq", old_checkstring) elif key in self.bad_share_checkstrings: old_checkstring = self.bad_share_checkstrings[key] testv = (0, len(old_checkstring), "eq", old_checkstring) else: # add a testv that requires the share not exist # Unfortunately, foolscap-0.2.5 has a bug in the way inbound # constraints are handled. If the same object is referenced # multiple times inside the arguments, foolscap emits a # 'reference' token instead of a distinct copy of the # argument. The bug is that these 'reference' tokens are not # accepted by the inbound constraint code. To work around # this, we need to prevent python from interning the # (constant) tuple, by creating a new copy of this vector # each time. # This bug is fixed in foolscap-0.2.6, and even though this # version of Tahoe requires foolscap-0.3.1 or newer, we are # supposed to be able to interoperate with older versions of # Tahoe which are allowed to use older versions of foolscap, # including foolscap-0.2.5 . In addition, I've seen other # foolscap problems triggered by 'reference' tokens (see #541 # for details). So we must keep this workaround in place. #testv = (0, 1, 'eq', "") testv = tuple([0, 1, 'eq', ""]) testvs = [testv] # the write vector is simply the share writev = [(0, self.shares[shnum])] if peerid not in all_tw_vectors: all_tw_vectors[peerid] = {} # maps shnum to (testvs, writevs, new_length) assert shnum not in all_tw_vectors[peerid] all_tw_vectors[peerid][shnum] = (testvs, writev, None) # we read the checkstring back from each share, however we only use # it to detect whether there was a new share that we didn't know # about. The success or failure of the write will tell us whether # there was a collision or not. If there is a collision, the first # thing we'll do is update the servermap, which will find out what # happened. We could conceivably reduce a roundtrip by using the # readv checkstring to populate the servermap, but really we'd have # to read enough data to validate the signatures too, so it wouldn't # be an overall win. read_vector = [(0, struct.calcsize(SIGNED_PREFIX))] # ok, send the messages! self.log("sending %d shares" % len(all_tw_vectors), level=log.NOISY) started = time.time() for (peerid, tw_vectors) in all_tw_vectors.items(): write_enabler = self._node.get_write_enabler(peerid) renew_secret = self._node.get_renewal_secret(peerid) cancel_secret = self._node.get_cancel_secret(peerid) secrets = (write_enabler, renew_secret, cancel_secret) shnums = tw_vectors.keys() for shnum in shnums: self.outstanding.add( (peerid, shnum) ) d = self._do_testreadwrite(peerid, secrets, tw_vectors, read_vector) d.addCallbacks(self._got_write_answer, self._got_write_error, callbackArgs=(peerid, shnums, started), errbackArgs=(peerid, shnums, started)) # tolerate immediate errback, like with DeadReferenceError d.addBoth(fireEventually) d.addCallback(self.loop) d.addErrback(self._fatal_error) self._update_status() self.log("%d shares sent" % len(all_tw_vectors), level=log.NOISY)
class Retrieve: # this class is currently single-use. Eventually (in MDMF) we will make # it multi-use, in which case you can call download(range) multiple # times, and each will have a separate response chain. However the # Retrieve object will remain tied to a specific version of the file, and # will use a single ServerMap instance. def __init__(self, filenode, servermap, verinfo, fetch_privkey=False): self._node = filenode assert self._node.get_pubkey() self._storage_index = filenode.get_storage_index() assert self._node.get_readkey() self._last_failure = None prefix = si_b2a(self._storage_index)[:5] self._log_number = log.msg("Retrieve(%s): starting" % prefix) self._outstanding_queries = {} # maps (peerid,shnum) to start_time self._running = True self._decoding = False self._bad_shares = set() self.servermap = servermap assert self._node.get_pubkey() self.verinfo = verinfo # during repair, we may be called upon to grab the private key, since # it wasn't picked up during a verify=False checker run, and we'll # need it for repair to generate the a new version. self._need_privkey = fetch_privkey if self._node.get_privkey(): self._need_privkey = False self._status = RetrieveStatus() self._status.set_storage_index(self._storage_index) self._status.set_helper(False) self._status.set_progress(0.0) self._status.set_active(True) (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo self._status.set_size(datalength) self._status.set_encoding(k, N) def get_status(self): return self._status def log(self, *args, **kwargs): if "parent" not in kwargs: kwargs["parent"] = self._log_number if "facility" not in kwargs: kwargs["facility"] = "tahoe.mutable.retrieve" return log.msg(*args, **kwargs) def download(self): self._done_deferred = defer.Deferred() self._started = time.time() self._status.set_status("Retrieving Shares") # first, which servers can we use? versionmap = self.servermap.make_versionmap() shares = versionmap[self.verinfo] # this sharemap is consumed as we decide to send requests self.remaining_sharemap = DictOfSets() for (shnum, peerid, timestamp) in shares: self.remaining_sharemap.add(shnum, peerid) self.shares = {} # maps shnum to validated blocks # how many shares do we need? (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo assert len(self.remaining_sharemap) >= k # we start with the lowest shnums we have available, since FEC is # faster if we're using "primary shares" self.active_shnums = set(sorted(self.remaining_sharemap.keys())[:k]) for shnum in self.active_shnums: # we use an arbitrary peer who has the share. If shares are # doubled up (more than one share per peer), we could make this # run faster by spreading the load among multiple peers. But the # algorithm to do that is more complicated than I want to write # right now, and a well-provisioned grid shouldn't have multiple # shares per peer. peerid = list(self.remaining_sharemap[shnum])[0] self.get_data(shnum, peerid) # control flow beyond this point: state machine. Receiving responses # from queries is the input. We might send out more queries, or we # might produce a result. return self._done_deferred def get_data(self, shnum, peerid): self.log(format="sending sh#%(shnum)d request to [%(peerid)s]", shnum=shnum, peerid=idlib.shortnodeid_b2a(peerid), level=log.NOISY) ss = self.servermap.connections[peerid] started = time.time() (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = self.verinfo offsets = dict(offsets_tuple) # we read the checkstring, to make sure that the data we grab is from # the right version. readv = [ (0, struct.calcsize(SIGNED_PREFIX)) ] # We also read the data, and the hashes necessary to validate them # (share_hash_chain, block_hash_tree, share_data). We don't read the # signature or the pubkey, since that was handled during the # servermap phase, and we'll be comparing the share hash chain # against the roothash that was validated back then. readv.append( (offsets['share_hash_chain'], offsets['enc_privkey'] - offsets['share_hash_chain'] ) ) # if we need the private key (for repair), we also fetch that if self._need_privkey: readv.append( (offsets['enc_privkey'], offsets['EOF'] - offsets['enc_privkey']) ) m = Marker() self._outstanding_queries[m] = (peerid, shnum, started) # ask the cache first got_from_cache = False datavs = [] for (offset, length) in readv: (data, timestamp) = self._node._read_from_cache(self.verinfo, shnum, offset, length) if data is not None: datavs.append(data) if len(datavs) == len(readv): self.log("got data from cache") got_from_cache = True d = fireEventually({shnum: datavs}) # datavs is a dict mapping shnum to a pair of strings else: d = self._do_read(ss, peerid, self._storage_index, [shnum], readv) self.remaining_sharemap.discard(shnum, peerid) d.addCallback(self._got_results, m, peerid, started, got_from_cache) d.addErrback(self._query_failed, m, peerid) # errors that aren't handled by _query_failed (and errors caused by # _query_failed) get logged, but we still want to check for doneness. def _oops(f): self.log(format="problem in _query_failed for sh#%(shnum)d to %(peerid)s", shnum=shnum, peerid=idlib.shortnodeid_b2a(peerid), failure=f, level=log.WEIRD, umid="W0xnQA") d.addErrback(_oops) d.addBoth(self._check_for_done) # any error during _check_for_done means the download fails. If the # download is successful, _check_for_done will fire _done by itself. d.addErrback(self._done) d.addErrback(log.err) return d # purely for testing convenience def _do_read(self, ss, peerid, storage_index, shnums, readv): # isolate the callRemote to a separate method, so tests can subclass # Publish and override it d = ss.callRemote("slot_readv", storage_index, shnums, readv) return d def remove_peer(self, peerid): for shnum in list(self.remaining_sharemap.keys()): self.remaining_sharemap.discard(shnum, peerid) def _got_results(self, datavs, marker, peerid, started, got_from_cache): now = time.time() elapsed = now - started if not got_from_cache: self._status.add_fetch_timing(peerid, elapsed) self.log(format="got results (%(shares)d shares) from [%(peerid)s]", shares=len(datavs), peerid=idlib.shortnodeid_b2a(peerid), level=log.NOISY) self._outstanding_queries.pop(marker, None) if not self._running: return # note that we only ask for a single share per query, so we only # expect a single share back. On the other hand, we use the extra # shares if we get them.. seems better than an assert(). for shnum,datav in datavs.items(): (prefix, hash_and_data) = datav[:2] try: self._got_results_one_share(shnum, peerid, prefix, hash_and_data) except CorruptShareError, e: # log it and give the other shares a chance to be processed f = failure.Failure() self.log(format="bad share: %(f_value)s", f_value=str(f.value), failure=f, level=log.WEIRD, umid="7fzWZw") self.notify_server_corruption(peerid, shnum, str(e)) self.remove_peer(peerid) self.servermap.mark_bad_share(peerid, shnum, prefix) self._bad_shares.add( (peerid, shnum) ) self._status.problems[peerid] = f self._last_failure = f pass if self._need_privkey and len(datav) > 2: lp = None self._try_to_validate_privkey(datav[2], peerid, shnum, lp)
def update_goal(self): # if log.recording_noisy if True: self.log_goal(self.goal, "before update: ") # first, remove any bad peers from our goal self.goal = set([ (peerid, shnum) for (peerid, shnum) in self.goal if peerid not in self.bad_peers ]) # find the homeless shares: homefull_shares = set([shnum for (peerid, shnum) in self.goal]) homeless_shares = set(range(self.total_shares)) - homefull_shares homeless_shares = sorted(list(homeless_shares)) # place them somewhere. We prefer unused servers at the beginning of # the available peer list. if not homeless_shares: return # if an old share X is on a node, put the new share X there too. # TODO: 1: redistribute shares to achieve one-per-peer, by copying # shares from existing peers to new (less-crowded) ones. The # old shares must still be updated. # TODO: 2: move those shares instead of copying them, to reduce future # update work # this is a bit CPU intensive but easy to analyze. We create a sort # order for each peerid. If the peerid is marked as bad, we don't # even put them in the list. Then we care about the number of shares # which have already been assigned to them. After that we care about # their permutation order. old_assignments = DictOfSets() for (peerid, shnum) in self.goal: old_assignments.add(peerid, shnum) peerlist = [] for i, (peerid, ss) in enumerate(self.full_peerlist): if peerid in self.bad_peers: continue entry = (len(old_assignments.get(peerid, [])), i, peerid, ss) peerlist.append(entry) peerlist.sort() if not peerlist: raise NotEnoughServersError("Ran out of non-bad servers, " "first_error=%s" % str(self._first_write_error), self._first_write_error) # we then index this peerlist with an integer, because we may have to # wrap. We update the goal as we go. i = 0 for shnum in homeless_shares: (ignored1, ignored2, peerid, ss) = peerlist[i] # if we are forced to send a share to a server that already has # one, we may have two write requests in flight, and the # servermap (which was computed before either request was sent) # won't reflect the new shares, so the second response will be # surprising. There is code in _got_write_answer() to tolerate # this, otherwise it would cause the publish to fail with an # UncoordinatedWriteError. See #546 for details of the trouble # this used to cause. self.goal.add( (peerid, shnum) ) self.connections[peerid] = ss i += 1 if i >= len(peerlist): i = 0 if True: self.log_goal(self.goal, "after update: ")