def _remove_shareholder(self, why, shareid, where): ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d", method=where, shnum=shareid, level=log.UNUSUAL, failure=why) if shareid in self.landlords: self.landlords[shareid].abort() peerid = self.landlords[shareid].get_peerid() assert peerid del self.landlords[shareid] self.servermap[shareid].remove(peerid) if not self.servermap[shareid]: del self.servermap[shareid] else: # even more UNUSUAL self.log("they weren't in our list of landlords", parent=ln, level=log.WEIRD, umid="TQGFRw") happiness = happinessutil.servers_of_happiness(self.servermap) if happiness < self.servers_of_happiness: peerids = set(happinessutil.shares_by_server(self.servermap).keys()) msg = happinessutil.failure_message(len(peerids), self.required_shares, self.servers_of_happiness, happiness) msg = "%s: %s" % (msg, why) raise UploadUnhappinessError(msg) self.log("but we can still continue with %s shares, we'll be happy " "with at least %s" % (happiness, self.servers_of_happiness), parent=ln)
def _remove_shareholder(self, why, shareid, where): ln = self.log( format="error while sending %(method)s to shareholder=%(shnum)d", method=where, shnum=shareid, level=log.UNUSUAL, failure=why) if shareid in self.landlords: self.landlords[shareid].abort() peerid = self.landlords[shareid].get_peerid() assert peerid del self.landlords[shareid] self.servermap[shareid].remove(peerid) if not self.servermap[shareid]: del self.servermap[shareid] else: # even more UNUSUAL self.log("they weren't in our list of landlords", parent=ln, level=log.WEIRD, umid="TQGFRw") happiness = happinessutil.servers_of_happiness(self.servermap) if happiness < self.min_happiness: peerids = set( happinessutil.shares_by_server(self.servermap).keys()) msg = happinessutil.failure_message(len(peerids), self.required_shares, self.min_happiness, happiness) msg = "%s: %s" % (msg, why) raise UploadUnhappinessError(msg) self.log("but we can still continue with %s shares, we'll be happy " "with at least %s" % (happiness, self.min_happiness), parent=ln)
def _gather_repair_results(self, ur, cr, crr): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) verifycap = self._verifycap servers_responding = set(cr.get_servers_responding()) sm = DictOfSets() assert isinstance(cr.get_sharemap(), DictOfSets) for shnum, servers in cr.get_sharemap().items(): for server in servers: sm.add(shnum, server) for shnum, servers in ur.get_sharemap().items(): for server in servers: sm.add(shnum, server) servers_responding.add(server) servers_responding = sorted(servers_responding) good_hosts = len(reduce(set.union, sm.values(), set())) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) count_happiness = servers_of_happiness(sm) prr = CheckResults( cr.get_uri(), cr.get_storage_index(), healthy=is_healthy, recoverable=is_recoverable, count_happiness=count_happiness, count_shares_needed=verifycap.needed_shares, count_shares_expected=verifycap.total_shares, count_shares_good=len(sm), count_good_share_hosts=good_hosts, count_recoverable_versions=int(is_recoverable), count_unrecoverable_versions=int(not is_recoverable), servers_responding=list(servers_responding), sharemap=sm, count_wrong_shares=0, # no such thing as wrong, for immutable list_corrupt_shares=cr.get_corrupt_shares(), count_corrupt_shares=len(cr.get_corrupt_shares()), list_incompatible_shares=cr.get_incompatible_shares(), count_incompatible_shares=len(cr.get_incompatible_shares()), summary="", report=[], share_problems=[], servermap=None) crr.repair_successful = is_healthy crr.post_repair_results = prr return crr
def _gather_repair_results(self, ur, cr, crr): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) verifycap = self._verifycap servers_responding = set(cr.get_servers_responding()) sm = DictOfSets() assert isinstance(cr.get_sharemap(), DictOfSets) for shnum, servers in cr.get_sharemap().items(): for server in servers: sm.add(shnum, server) for shnum, servers in ur.get_sharemap().items(): for server in servers: sm.add(shnum, server) servers_responding.add(server) servers_responding = sorted(servers_responding) good_hosts = len(reduce(set.union, sm.values(), set())) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) count_happiness = servers_of_happiness(sm) prr = CheckResults(cr.get_uri(), cr.get_storage_index(), healthy=is_healthy, recoverable=is_recoverable, count_happiness=count_happiness, count_shares_needed=verifycap.needed_shares, count_shares_expected=verifycap.total_shares, count_shares_good=len(sm), count_good_share_hosts=good_hosts, count_recoverable_versions=int(is_recoverable), count_unrecoverable_versions=int(not is_recoverable), servers_responding=list(servers_responding), sharemap=sm, count_wrong_shares=0, # no such thing as wrong, for immutable list_corrupt_shares=cr.get_corrupt_shares(), count_corrupt_shares=len(cr.get_corrupt_shares()), list_incompatible_shares=cr.get_incompatible_shares(), count_incompatible_shares=len(cr.get_incompatible_shares()), summary="", report=[], share_problems=[], servermap=None) crr.repair_successful = is_healthy crr.post_repair_results = prr return crr
def _format_results(self, results): SI = self._verifycap.get_storage_index() verifiedshares = dictutil.DictOfSets() # {sharenum: set(server)} servers = {} # {server: set(sharenums)} corruptshare_locators = [] # (server, storageindex, sharenum) incompatibleshare_locators = [] # (server, storageindex, sharenum) servers_responding = set() # server for verified, server, corrupt, incompatible, responded in results: servers.setdefault(server, set()).update(verified) for sharenum in verified: verifiedshares.setdefault(sharenum, set()).add(server) for sharenum in corrupt: corruptshare_locators.append((server, SI, sharenum)) for sharenum in incompatible: incompatibleshare_locators.append((server, SI, sharenum)) if responded: servers_responding.add(server) good_share_hosts = len([s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, (verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: healthy = True summary = "Healthy" else: healthy = False summary = ("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: recoverable = 1 unrecoverable = 0 else: recoverable = 0 unrecoverable = 1 count_happiness = servers_of_happiness(verifiedshares) cr = CheckResults(self._verifycap, SI, healthy=healthy, recoverable=bool(recoverable), count_happiness=count_happiness, count_shares_needed=self._verifycap.needed_shares, count_shares_expected=self._verifycap.total_shares, count_shares_good=len(verifiedshares), count_good_share_hosts=good_share_hosts, count_recoverable_versions=recoverable, count_unrecoverable_versions=unrecoverable, servers_responding=list(servers_responding), sharemap=verifiedshares, count_wrong_shares=0, # no such thing, for immutable list_corrupt_shares=corruptshare_locators, count_corrupt_shares=len(corruptshare_locators), list_incompatible_shares=incompatibleshare_locators, count_incompatible_shares=len(incompatibleshare_locators), summary=summary, report=[], share_problems=[], servermap=None) return cr
def test_servers_of_happiness_utility_function(self): # These tests are concerned with the servers_of_happiness() # utility function, and its underlying matching algorithm. Other # aspects of the servers_of_happiness behavior are tested # elsehwere These tests exist to ensure that # servers_of_happiness doesn't under or overcount the happiness # value for given inputs. # servers_of_happiness expects a dict of # shnum => set(serverids) as a preexisting shares argument. test1 = { 1: set(["server1"]), 2: set(["server2"]), 3: set(["server3"]), 4: set(["server4"]) } happy = servers_of_happiness(test1) self.failUnlessEqual(4, happy) test1[4] = set(["server1"]) # We've added a duplicate server, so now servers_of_happiness # should be 3 instead of 4. happy = servers_of_happiness(test1) self.failUnlessEqual(3, happy) # The second argument of merge_servers should be a set of objects with # serverid and buckets as attributes. In actual use, these will be # ServerTracker instances, but for testing it is fine to make a # FakeServerTracker whose job is to hold those instance variables to # test that part. trackers = [] for (i, server) in [(i, "server%d" % i) for i in range(5, 9)]: t = FakeServerTracker(server, [i]) trackers.append(t) # Recall that test1 is a server layout with servers_of_happiness # = 3. Since there isn't any overlap between the shnum -> # set([serverid]) correspondences in test1 and those in trackers, # the result here should be 7. test2 = merge_servers(test1, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(7, happy) # Now add an overlapping server to trackers. This is redundant, # so it should not cause the previously reported happiness value # to change. t = FakeServerTracker("server1", [1]) trackers.append(t) test2 = merge_servers(test1, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(7, happy) test = {} happy = servers_of_happiness(test) self.failUnlessEqual(0, happy) # Test a more substantial overlap between the trackers and the # existing assignments. test = { 1: set(['server1']), 2: set(['server2']), 3: set(['server3']), 4: set(['server4']), } trackers = [] t = FakeServerTracker('server5', [4]) trackers.append(t) t = FakeServerTracker('server6', [3, 5]) trackers.append(t) # The value returned by servers_of_happiness is the size # of a maximum matching in the bipartite graph that # servers_of_happiness() makes between serverids and share # numbers. It should find something like this: # (server 1, share 1) # (server 2, share 2) # (server 3, share 3) # (server 5, share 4) # (server 6, share 5) # # and, since there are 5 edges in this matching, it should # return 5. test2 = merge_servers(test, set(trackers)) happy = servers_of_happiness(test2) self.failUnlessEqual(5, happy) # Zooko's first puzzle: # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:156) # # server 1: shares 0, 1 # server 2: shares 1, 2 # server 3: share 2 # # This should yield happiness of 3. test = { 0: set(['server1']), 1: set(['server1', 'server2']), 2: set(['server2', 'server3']), } self.failUnlessEqual(3, servers_of_happiness(test)) # Zooko's second puzzle: # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:158) # # server 1: shares 0, 1 # server 2: share 1 # # This should yield happiness of 2. test = { 0: set(['server1']), 1: set(['server1', 'server2']), } self.failUnlessEqual(2, servers_of_happiness(test))
def _make_checker_results(self, smap): self._monitor.raise_if_cancelled() healthy = True report = [] summary = [] vmap = smap.make_versionmap() recoverable = smap.recoverable_versions() unrecoverable = smap.unrecoverable_versions() if recoverable: report.append("Recoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in recoverable])) if unrecoverable: report.append("Unrecoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in unrecoverable])) if smap.unrecoverable_versions(): healthy = False summary.append("some versions are unrecoverable") report.append("Unhealthy: some versions are unrecoverable") if len(recoverable) == 0: healthy = False summary.append("no versions are recoverable") report.append("Unhealthy: no versions are recoverable") if len(recoverable) > 1: healthy = False summary.append("multiple versions are recoverable") report.append("Unhealthy: there are multiple recoverable versions") if recoverable: best_version = smap.best_recoverable_version() report.append("Best Recoverable Version: " + smap.summarize_version(best_version)) counters = self._count_shares(smap, best_version) s = counters["count-shares-good"] k = counters["count-shares-needed"] N = counters["count-shares-expected"] if s < N: healthy = False report.append("Unhealthy: best version has only %d shares " "(encoding is %d-of-%d)" % (s, k, N)) summary.append("%d shares (enc %d-of-%d)" % (s, k, N)) elif unrecoverable: healthy = False # find a k and N from somewhere first = list(unrecoverable)[0] # not exactly the best version, but that doesn't matter too much counters = self._count_shares(smap, first) else: # couldn't find anything at all counters = { "count-shares-good": 0, "count-shares-needed": 3, # arbitrary defaults "count-shares-expected": 10, "count-good-share-hosts": 0, "count-wrong-shares": 0, } corrupt_share_locators = [] problems = [] if self.bad_shares: report.append("Corrupt Shares:") summary.append("Corrupt Shares:") for (server, shnum, f) in sorted(self.bad_shares): serverid = server.get_serverid() locator = (server, self._storage_index, shnum) corrupt_share_locators.append(locator) s = "%s-sh%d" % (server.get_name(), shnum) if f.check(CorruptShareError): ft = f.value.reason else: ft = str(f) report.append(" %s: %s" % (s, ft)) summary.append(s) p = (serverid, self._storage_index, shnum, f) problems.append(p) msg = ("CorruptShareError during mutable verify, " "serverid=%(serverid)s, si=%(si)s, shnum=%(shnum)d, " "where=%(where)s") log.msg(format=msg, serverid=server.get_name(), si=base32.b2a(self._storage_index), shnum=shnum, where=ft, level=log.WEIRD, umid="EkK8QA") sharemap = dictutil.DictOfSets() for verinfo in vmap: for (shnum, server, timestamp) in vmap[verinfo]: shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum) sharemap.add(shareid, server) if healthy: summary = "Healthy" else: summary = "Unhealthy: " + " ".join(summary) count_happiness = servers_of_happiness(sharemap) cr = CheckResults(from_string(self._node.get_uri()), self._storage_index, healthy=healthy, recoverable=bool(recoverable), count_happiness=count_happiness, count_shares_needed=counters["count-shares-needed"], count_shares_expected=counters["count-shares-expected"], count_shares_good=counters["count-shares-good"], count_good_share_hosts=counters["count-good-share-hosts"], count_recoverable_versions=len(recoverable), count_unrecoverable_versions=len(unrecoverable), servers_responding=list(smap.get_reachable_servers()), sharemap=sharemap, count_wrong_shares=counters["count-wrong-shares"], list_corrupt_shares=corrupt_share_locators, count_corrupt_shares=len(corrupt_share_locators), list_incompatible_shares=[], count_incompatible_shares=0, summary=summary, report=report, share_problems=problems, servermap=smap.copy()) return cr
def _format_results(self, results): SI = self._verifycap.get_storage_index() verifiedshares = dictutil.DictOfSets() # {sharenum: set(server)} servers = {} # {server: set(sharenums)} corruptshare_locators = [] # (server, storageindex, sharenum) incompatibleshare_locators = [] # (server, storageindex, sharenum) servers_responding = set() # server for verified, server, corrupt, incompatible, responded in results: servers.setdefault(server, set()).update(verified) for sharenum in verified: verifiedshares.setdefault(sharenum, set()).add(server) for sharenum in corrupt: corruptshare_locators.append((server, SI, sharenum)) for sharenum in incompatible: incompatibleshare_locators.append((server, SI, sharenum)) if responded: servers_responding.add(server) good_share_hosts = len([s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, ( verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: healthy = True summary = "Healthy" else: healthy = False summary = ("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: recoverable = 1 unrecoverable = 0 else: recoverable = 0 unrecoverable = 1 count_happiness = servers_of_happiness(verifiedshares) cr = CheckResults( self._verifycap, SI, healthy=healthy, recoverable=bool(recoverable), count_happiness=count_happiness, count_shares_needed=self._verifycap.needed_shares, count_shares_expected=self._verifycap.total_shares, count_shares_good=len(verifiedshares), count_good_share_hosts=good_share_hosts, count_recoverable_versions=recoverable, count_unrecoverable_versions=unrecoverable, servers_responding=list(servers_responding), sharemap=verifiedshares, count_wrong_shares=0, # no such thing, for immutable list_corrupt_shares=corruptshare_locators, count_corrupt_shares=len(corruptshare_locators), list_incompatible_shares=incompatibleshare_locators, count_incompatible_shares=len(incompatibleshare_locators), summary=summary, report=[], share_problems=[], servermap=None) return cr