Exemple #1
0
 def _remove_shareholder(self, why, shareid, where):
     ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
                   method=where, shnum=shareid,
                   level=log.UNUSUAL, failure=why)
     if shareid in self.landlords:
         self.landlords[shareid].abort()
         peerid = self.landlords[shareid].get_peerid()
         assert peerid
         del self.landlords[shareid]
         self.servermap[shareid].remove(peerid)
         if not self.servermap[shareid]:
             del self.servermap[shareid]
     else:
         # even more UNUSUAL
         self.log("they weren't in our list of landlords", parent=ln,
                  level=log.WEIRD, umid="TQGFRw")
     happiness = happinessutil.servers_of_happiness(self.servermap)
     if happiness < self.servers_of_happiness:
         peerids = set(happinessutil.shares_by_server(self.servermap).keys())
         msg = happinessutil.failure_message(len(peerids),
                                             self.required_shares,
                                             self.servers_of_happiness,
                                             happiness)
         msg = "%s: %s" % (msg, why)
         raise UploadUnhappinessError(msg)
     self.log("but we can still continue with %s shares, we'll be happy "
              "with at least %s" % (happiness,
                                    self.servers_of_happiness),
              parent=ln)
Exemple #2
0
 def _remove_shareholder(self, why, shareid, where):
     ln = self.log(
         format="error while sending %(method)s to shareholder=%(shnum)d",
         method=where,
         shnum=shareid,
         level=log.UNUSUAL,
         failure=why)
     if shareid in self.landlords:
         self.landlords[shareid].abort()
         peerid = self.landlords[shareid].get_peerid()
         assert peerid
         del self.landlords[shareid]
         self.servermap[shareid].remove(peerid)
         if not self.servermap[shareid]:
             del self.servermap[shareid]
     else:
         # even more UNUSUAL
         self.log("they weren't in our list of landlords",
                  parent=ln,
                  level=log.WEIRD,
                  umid="TQGFRw")
     happiness = happinessutil.servers_of_happiness(self.servermap)
     if happiness < self.min_happiness:
         peerids = set(
             happinessutil.shares_by_server(self.servermap).keys())
         msg = happinessutil.failure_message(len(peerids),
                                             self.required_shares,
                                             self.min_happiness, happiness)
         msg = "%s: %s" % (msg, why)
         raise UploadUnhappinessError(msg)
     self.log("but we can still continue with %s shares, we'll be happy "
              "with at least %s" % (happiness, self.min_happiness),
              parent=ln)
Exemple #3
0
    def _gather_repair_results(self, ur, cr, crr):
        assert IUploadResults.providedBy(ur), ur
        # clone the cr (check results) to form the basis of the
        # prr (post-repair results)

        verifycap = self._verifycap
        servers_responding = set(cr.get_servers_responding())
        sm = DictOfSets()
        assert isinstance(cr.get_sharemap(), DictOfSets)
        for shnum, servers in cr.get_sharemap().items():
            for server in servers:
                sm.add(shnum, server)
        for shnum, servers in ur.get_sharemap().items():
            for server in servers:
                sm.add(shnum, server)
                servers_responding.add(server)
        servers_responding = sorted(servers_responding)

        good_hosts = len(reduce(set.union, sm.values(), set()))
        is_healthy = bool(len(sm) >= verifycap.total_shares)
        is_recoverable = bool(len(sm) >= verifycap.needed_shares)

        count_happiness = servers_of_happiness(sm)

        prr = CheckResults(
            cr.get_uri(),
            cr.get_storage_index(),
            healthy=is_healthy,
            recoverable=is_recoverable,
            count_happiness=count_happiness,
            count_shares_needed=verifycap.needed_shares,
            count_shares_expected=verifycap.total_shares,
            count_shares_good=len(sm),
            count_good_share_hosts=good_hosts,
            count_recoverable_versions=int(is_recoverable),
            count_unrecoverable_versions=int(not is_recoverable),
            servers_responding=list(servers_responding),
            sharemap=sm,
            count_wrong_shares=0,  # no such thing as wrong, for immutable
            list_corrupt_shares=cr.get_corrupt_shares(),
            count_corrupt_shares=len(cr.get_corrupt_shares()),
            list_incompatible_shares=cr.get_incompatible_shares(),
            count_incompatible_shares=len(cr.get_incompatible_shares()),
            summary="",
            report=[],
            share_problems=[],
            servermap=None)
        crr.repair_successful = is_healthy
        crr.post_repair_results = prr
        return crr
Exemple #4
0
    def _gather_repair_results(self, ur, cr, crr):
        assert IUploadResults.providedBy(ur), ur
        # clone the cr (check results) to form the basis of the
        # prr (post-repair results)

        verifycap = self._verifycap
        servers_responding = set(cr.get_servers_responding())
        sm = DictOfSets()
        assert isinstance(cr.get_sharemap(), DictOfSets)
        for shnum, servers in cr.get_sharemap().items():
            for server in servers:
                sm.add(shnum, server)
        for shnum, servers in ur.get_sharemap().items():
            for server in servers:
                sm.add(shnum, server)
                servers_responding.add(server)
        servers_responding = sorted(servers_responding)

        good_hosts = len(reduce(set.union, sm.values(), set()))
        is_healthy = bool(len(sm) >= verifycap.total_shares)
        is_recoverable = bool(len(sm) >= verifycap.needed_shares)

        count_happiness = servers_of_happiness(sm)

        prr = CheckResults(cr.get_uri(), cr.get_storage_index(),
                           healthy=is_healthy, recoverable=is_recoverable,
                           count_happiness=count_happiness,
                           count_shares_needed=verifycap.needed_shares,
                           count_shares_expected=verifycap.total_shares,
                           count_shares_good=len(sm),
                           count_good_share_hosts=good_hosts,
                           count_recoverable_versions=int(is_recoverable),
                           count_unrecoverable_versions=int(not is_recoverable),
                           servers_responding=list(servers_responding),
                           sharemap=sm,
                           count_wrong_shares=0, # no such thing as wrong, for immutable
                           list_corrupt_shares=cr.get_corrupt_shares(),
                           count_corrupt_shares=len(cr.get_corrupt_shares()),
                           list_incompatible_shares=cr.get_incompatible_shares(),
                           count_incompatible_shares=len(cr.get_incompatible_shares()),
                           summary="",
                           report=[],
                           share_problems=[],
                           servermap=None)
        crr.repair_successful = is_healthy
        crr.post_repair_results = prr
        return crr
Exemple #5
0
    def _format_results(self, results):
        SI = self._verifycap.get_storage_index()

        verifiedshares = dictutil.DictOfSets() # {sharenum: set(server)}
        servers = {} # {server: set(sharenums)}
        corruptshare_locators = [] # (server, storageindex, sharenum)
        incompatibleshare_locators = [] # (server, storageindex, sharenum)
        servers_responding = set() # server

        for verified, server, corrupt, incompatible, responded in results:
            servers.setdefault(server, set()).update(verified)
            for sharenum in verified:
                verifiedshares.setdefault(sharenum, set()).add(server)
            for sharenum in corrupt:
                corruptshare_locators.append((server, SI, sharenum))
            for sharenum in incompatible:
                incompatibleshare_locators.append((server, SI, sharenum))
            if responded:
                servers_responding.add(server)

        good_share_hosts = len([s for s in servers.keys() if servers[s]])

        assert len(verifiedshares) <= self._verifycap.total_shares, (verifiedshares.keys(), self._verifycap.total_shares)
        if len(verifiedshares) == self._verifycap.total_shares:
            healthy = True
            summary = "Healthy"
        else:
            healthy = False
            summary = ("Not Healthy: %d shares (enc %d-of-%d)" %
                       (len(verifiedshares),
                        self._verifycap.needed_shares,
                        self._verifycap.total_shares))
        if len(verifiedshares) >= self._verifycap.needed_shares:
            recoverable = 1
            unrecoverable = 0
        else:
            recoverable = 0
            unrecoverable = 1

        count_happiness = servers_of_happiness(verifiedshares)

        cr = CheckResults(self._verifycap, SI,
                          healthy=healthy, recoverable=bool(recoverable),
                          count_happiness=count_happiness,
                          count_shares_needed=self._verifycap.needed_shares,
                          count_shares_expected=self._verifycap.total_shares,
                          count_shares_good=len(verifiedshares),
                          count_good_share_hosts=good_share_hosts,
                          count_recoverable_versions=recoverable,
                          count_unrecoverable_versions=unrecoverable,
                          servers_responding=list(servers_responding),
                          sharemap=verifiedshares,
                          count_wrong_shares=0, # no such thing, for immutable
                          list_corrupt_shares=corruptshare_locators,
                          count_corrupt_shares=len(corruptshare_locators),
                          list_incompatible_shares=incompatibleshare_locators,
                          count_incompatible_shares=len(incompatibleshare_locators),
                          summary=summary,
                          report=[],
                          share_problems=[],
                          servermap=None)

        return cr
    def test_servers_of_happiness_utility_function(self):
        # These tests are concerned with the servers_of_happiness()
        # utility function, and its underlying matching algorithm. Other
        # aspects of the servers_of_happiness behavior are tested
        # elsehwere These tests exist to ensure that
        # servers_of_happiness doesn't under or overcount the happiness
        # value for given inputs.

        # servers_of_happiness expects a dict of
        # shnum => set(serverids) as a preexisting shares argument.
        test1 = {
            1: set(["server1"]),
            2: set(["server2"]),
            3: set(["server3"]),
            4: set(["server4"])
        }
        happy = servers_of_happiness(test1)
        self.failUnlessEqual(4, happy)
        test1[4] = set(["server1"])
        # We've added a duplicate server, so now servers_of_happiness
        # should be 3 instead of 4.
        happy = servers_of_happiness(test1)
        self.failUnlessEqual(3, happy)
        # The second argument of merge_servers should be a set of objects with
        # serverid and buckets as attributes. In actual use, these will be
        # ServerTracker instances, but for testing it is fine to make a
        # FakeServerTracker whose job is to hold those instance variables to
        # test that part.
        trackers = []
        for (i, server) in [(i, "server%d" % i) for i in range(5, 9)]:
            t = FakeServerTracker(server, [i])
            trackers.append(t)
        # Recall that test1 is a server layout with servers_of_happiness
        # = 3.  Since there isn't any overlap between the shnum ->
        # set([serverid]) correspondences in test1 and those in trackers,
        # the result here should be 7.
        test2 = merge_servers(test1, set(trackers))
        happy = servers_of_happiness(test2)
        self.failUnlessEqual(7, happy)
        # Now add an overlapping server to trackers. This is redundant,
        # so it should not cause the previously reported happiness value
        # to change.
        t = FakeServerTracker("server1", [1])
        trackers.append(t)
        test2 = merge_servers(test1, set(trackers))
        happy = servers_of_happiness(test2)
        self.failUnlessEqual(7, happy)
        test = {}
        happy = servers_of_happiness(test)
        self.failUnlessEqual(0, happy)
        # Test a more substantial overlap between the trackers and the
        # existing assignments.
        test = {
            1: set(['server1']),
            2: set(['server2']),
            3: set(['server3']),
            4: set(['server4']),
        }
        trackers = []
        t = FakeServerTracker('server5', [4])
        trackers.append(t)
        t = FakeServerTracker('server6', [3, 5])
        trackers.append(t)
        # The value returned by servers_of_happiness is the size
        # of a maximum matching in the bipartite graph that
        # servers_of_happiness() makes between serverids and share
        # numbers. It should find something like this:
        # (server 1, share 1)
        # (server 2, share 2)
        # (server 3, share 3)
        # (server 5, share 4)
        # (server 6, share 5)
        #
        # and, since there are 5 edges in this matching, it should
        # return 5.
        test2 = merge_servers(test, set(trackers))
        happy = servers_of_happiness(test2)
        self.failUnlessEqual(5, happy)
        # Zooko's first puzzle:
        # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:156)
        #
        # server 1: shares 0, 1
        # server 2: shares 1, 2
        # server 3: share 2
        #
        # This should yield happiness of 3.
        test = {
            0: set(['server1']),
            1: set(['server1', 'server2']),
            2: set(['server2', 'server3']),
        }
        self.failUnlessEqual(3, servers_of_happiness(test))
        # Zooko's second puzzle:
        # (from http://allmydata.org/trac/tahoe-lafs/ticket/778#comment:158)
        #
        # server 1: shares 0, 1
        # server 2: share 1
        #
        # This should yield happiness of 2.
        test = {
            0: set(['server1']),
            1: set(['server1', 'server2']),
        }
        self.failUnlessEqual(2, servers_of_happiness(test))
Exemple #7
0
    def _make_checker_results(self, smap):
        self._monitor.raise_if_cancelled()
        healthy = True
        report = []
        summary = []
        vmap = smap.make_versionmap()
        recoverable = smap.recoverable_versions()
        unrecoverable = smap.unrecoverable_versions()

        if recoverable:
            report.append("Recoverable Versions: " +
                          "/".join(["%d*%s" % (len(vmap[v]),
                                               smap.summarize_version(v))
                                    for v in recoverable]))
        if unrecoverable:
            report.append("Unrecoverable Versions: " +
                          "/".join(["%d*%s" % (len(vmap[v]),
                                               smap.summarize_version(v))
                                    for v in unrecoverable]))
        if smap.unrecoverable_versions():
            healthy = False
            summary.append("some versions are unrecoverable")
            report.append("Unhealthy: some versions are unrecoverable")
        if len(recoverable) == 0:
            healthy = False
            summary.append("no versions are recoverable")
            report.append("Unhealthy: no versions are recoverable")
        if len(recoverable) > 1:
            healthy = False
            summary.append("multiple versions are recoverable")
            report.append("Unhealthy: there are multiple recoverable versions")

        if recoverable:
            best_version = smap.best_recoverable_version()
            report.append("Best Recoverable Version: " +
                          smap.summarize_version(best_version))
            counters = self._count_shares(smap, best_version)
            s = counters["count-shares-good"]
            k = counters["count-shares-needed"]
            N = counters["count-shares-expected"]
            if s < N:
                healthy = False
                report.append("Unhealthy: best version has only %d shares "
                              "(encoding is %d-of-%d)" % (s, k, N))
                summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
        elif unrecoverable:
            healthy = False
            # find a k and N from somewhere
            first = list(unrecoverable)[0]
            # not exactly the best version, but that doesn't matter too much
            counters = self._count_shares(smap, first)
        else:
            # couldn't find anything at all
            counters = {
                "count-shares-good": 0,
                "count-shares-needed": 3, # arbitrary defaults
                "count-shares-expected": 10,
                "count-good-share-hosts": 0,
                "count-wrong-shares": 0,
                }

        corrupt_share_locators = []
        problems = []
        if self.bad_shares:
            report.append("Corrupt Shares:")
            summary.append("Corrupt Shares:")
        for (server, shnum, f) in sorted(self.bad_shares):
            serverid = server.get_serverid()
            locator = (server, self._storage_index, shnum)
            corrupt_share_locators.append(locator)
            s = "%s-sh%d" % (server.get_name(), shnum)
            if f.check(CorruptShareError):
                ft = f.value.reason
            else:
                ft = str(f)
            report.append(" %s: %s" % (s, ft))
            summary.append(s)
            p = (serverid, self._storage_index, shnum, f)
            problems.append(p)
            msg = ("CorruptShareError during mutable verify, "
                   "serverid=%(serverid)s, si=%(si)s, shnum=%(shnum)d, "
                   "where=%(where)s")
            log.msg(format=msg, serverid=server.get_name(),
                    si=base32.b2a(self._storage_index),
                    shnum=shnum,
                    where=ft,
                    level=log.WEIRD, umid="EkK8QA")

        sharemap = dictutil.DictOfSets()
        for verinfo in vmap:
            for (shnum, server, timestamp) in vmap[verinfo]:
                shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum)
                sharemap.add(shareid, server)
        if healthy:
            summary = "Healthy"
        else:
            summary = "Unhealthy: " + " ".join(summary)

        count_happiness = servers_of_happiness(sharemap)

        cr = CheckResults(from_string(self._node.get_uri()),
                          self._storage_index,
                          healthy=healthy, recoverable=bool(recoverable),
                          count_happiness=count_happiness,
                          count_shares_needed=counters["count-shares-needed"],
                          count_shares_expected=counters["count-shares-expected"],
                          count_shares_good=counters["count-shares-good"],
                          count_good_share_hosts=counters["count-good-share-hosts"],
                          count_recoverable_versions=len(recoverable),
                          count_unrecoverable_versions=len(unrecoverable),
                          servers_responding=list(smap.get_reachable_servers()),
                          sharemap=sharemap,
                          count_wrong_shares=counters["count-wrong-shares"],
                          list_corrupt_shares=corrupt_share_locators,
                          count_corrupt_shares=len(corrupt_share_locators),
                          list_incompatible_shares=[],
                          count_incompatible_shares=0,
                          summary=summary,
                          report=report,
                          share_problems=problems,
                          servermap=smap.copy())
        return cr
Exemple #8
0
    def _format_results(self, results):
        SI = self._verifycap.get_storage_index()

        verifiedshares = dictutil.DictOfSets()  # {sharenum: set(server)}
        servers = {}  # {server: set(sharenums)}
        corruptshare_locators = []  # (server, storageindex, sharenum)
        incompatibleshare_locators = []  # (server, storageindex, sharenum)
        servers_responding = set()  # server

        for verified, server, corrupt, incompatible, responded in results:
            servers.setdefault(server, set()).update(verified)
            for sharenum in verified:
                verifiedshares.setdefault(sharenum, set()).add(server)
            for sharenum in corrupt:
                corruptshare_locators.append((server, SI, sharenum))
            for sharenum in incompatible:
                incompatibleshare_locators.append((server, SI, sharenum))
            if responded:
                servers_responding.add(server)

        good_share_hosts = len([s for s in servers.keys() if servers[s]])

        assert len(verifiedshares) <= self._verifycap.total_shares, (
            verifiedshares.keys(), self._verifycap.total_shares)
        if len(verifiedshares) == self._verifycap.total_shares:
            healthy = True
            summary = "Healthy"
        else:
            healthy = False
            summary = ("Not Healthy: %d shares (enc %d-of-%d)" %
                       (len(verifiedshares), self._verifycap.needed_shares,
                        self._verifycap.total_shares))
        if len(verifiedshares) >= self._verifycap.needed_shares:
            recoverable = 1
            unrecoverable = 0
        else:
            recoverable = 0
            unrecoverable = 1

        count_happiness = servers_of_happiness(verifiedshares)

        cr = CheckResults(
            self._verifycap,
            SI,
            healthy=healthy,
            recoverable=bool(recoverable),
            count_happiness=count_happiness,
            count_shares_needed=self._verifycap.needed_shares,
            count_shares_expected=self._verifycap.total_shares,
            count_shares_good=len(verifiedshares),
            count_good_share_hosts=good_share_hosts,
            count_recoverable_versions=recoverable,
            count_unrecoverable_versions=unrecoverable,
            servers_responding=list(servers_responding),
            sharemap=verifiedshares,
            count_wrong_shares=0,  # no such thing, for immutable
            list_corrupt_shares=corruptshare_locators,
            count_corrupt_shares=len(corruptshare_locators),
            list_incompatible_shares=incompatibleshare_locators,
            count_incompatible_shares=len(incompatibleshare_locators),
            summary=summary,
            report=[],
            share_problems=[],
            servermap=None)

        return cr