def test_paced_service(self):
        self.basedir = "crawler/Basic/paced_service"
        fileutil.make_dirs(self.basedir)
        serverid = b"\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]

        statefile = os.path.join(self.basedir, "statefile")
        c = PacedCrawler(ss, statefile)

        did_check_progress = [False]
        def check_progress():
            c.yield_cb = None
            try:
                p = c.get_progress()
                self.failUnlessEqual(p["cycle-in-progress"], True)
                pct = p["cycle-complete-percentage"]
                # after 6 buckets, we happen to be at 76.17% complete. As
                # long as we create shares in deterministic order, this will
                # continue to be true.
                self.failUnlessEqual(int(pct), 76)
                left = p["remaining-sleep-time"]
                self.failUnless(isinstance(left, float), left)
                self.failUnless(left > 0.0, left)
            except Exception as e:
                did_check_progress[0] = e
            else:
                did_check_progress[0] = True
        c.yield_cb = check_progress

        c.setServiceParent(self.s)
        # that should get through 6 buckets, pause for a little while (and
        # run check_progress()), then resume

        d = c.finished_d
        def _check(ignored):
            if did_check_progress[0] is not True:
                raise did_check_progress[0]
            self.failUnless(did_check_progress[0])
            self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
            # at this point, the crawler should be sitting in the inter-cycle
            # timer, which should be pegged at the minumum cycle time
            self.failUnless(c.timer)
            self.failUnless(c.sleeping_between_cycles)
            self.failUnlessEqual(c.current_sleep_time, c.minimum_cycle_time)

            p = c.get_progress()
            self.failUnlessEqual(p["cycle-in-progress"], False)
            naptime = p["remaining-wait-time"]
            self.failUnless(isinstance(naptime, float), naptime)
            # min-cycle-time is 300, so this is basically testing that it took
            # less than 290s to crawl
            self.failUnless(naptime > 10.0, naptime)
            soon = p["next-crawl-time"] - time.time()
            self.failUnless(soon > 10.0, soon)

        d.addCallback(_check)
        return d
Example #2
0
    def test_immediate(self):
        self.basedir = "crawler/Basic/immediate"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]
        statefile = os.path.join(self.basedir, "statefile")

        c = BucketEnumeratingCrawler(ss, statefile, allowed_cpu_percentage=.1)
        c.load_state()

        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # make sure the statefile has been returned to the starting point
        c.finished_d = defer.Deferred()
        c.all_buckets = []
        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # check that a new crawler picks up on the state file properly
        c2 = BucketEnumeratingCrawler(ss, statefile)
        c2.load_state()

        c2.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
Example #3
0
    def test_service(self):
        self.basedir = "crawler/Basic/service"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]

        statefile = os.path.join(self.basedir, "statefile")
        c = BucketEnumeratingCrawler(ss, statefile)
        c.setServiceParent(self.s)

        # it should be legal to call get_state() and get_progress() right
        # away, even before the first tick is performed. No work should have
        # been done yet.
        s = c.get_state()
        p = c.get_progress()
        self.failUnlessEqual(s["last-complete-prefix"], None)
        self.failUnlessEqual(s["current-cycle"], None)
        self.failUnlessEqual(p["cycle-in-progress"], False)

        d = c.finished_d
        def _check(ignored):
            self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
        d.addCallback(_check)
        return d
Example #4
0
    def test_oneshot(self):
        self.basedir = "crawler/Basic/oneshot"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(30):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = OneShotCrawler(ss, statefile)
        c.setServiceParent(self.s)

        d = c.finished_d

        def _finished_first_cycle(ignored):
            return fireEventually(c.counter)

        d.addCallback(_finished_first_cycle)

        def _check(old_counter):
            # the crawler should do any work after it's been stopped
            self.failUnlessEqual(old_counter, c.counter)
            self.failIf(c.running)
            self.failIf(c.timer)
            self.failIf(c.current_sleep_time)
            s = c.get_state()
            self.failUnlessEqual(s["last-cycle-finished"], 0)
            self.failUnlessEqual(s["current-cycle"], None)

        d.addCallback(_check)
        return d
Example #5
0
    def test_empty_subclass(self):
        self.basedir = "crawler/Basic/empty_subclass"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(10):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = ShareCrawler(ss, statefile)
        c.slow_start = 0
        c.setServiceParent(self.s)

        # we just let it run for a while, to get figleaf coverage of the
        # empty methods in the base class

        def _check():
            return bool(c.state["last-cycle-finished"] is not None)
        d = self.poll(_check)
        def _done(ignored):
            state = c.get_state()
            self.failUnless(state["last-cycle-finished"] is not None)
        d.addCallback(_done)
        return d
Example #6
0
    def test_immediate(self):
        self.basedir = "crawler/Basic/immediate"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]
        statefile = os.path.join(self.basedir, "statefile")

        c = BucketEnumeratingCrawler(ss, statefile, allowed_cpu_percentage=.1)
        c.load_state()

        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # make sure the statefile has been returned to the starting point
        c.finished_d = defer.Deferred()
        c.all_buckets = []
        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # check that a new crawler picks up on the state file properly
        c2 = BucketEnumeratingCrawler(ss, statefile)
        c2.load_state()

        c2.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
Example #7
0
    def test_empty_subclass(self):
        self.basedir = "crawler/Basic/empty_subclass"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(10):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = ShareCrawler(ss, statefile)
        c.slow_start = 0
        c.setServiceParent(self.s)

        # we just let it run for a while, to get figleaf coverage of the
        # empty methods in the base class

        def _check():
            return bool(c.state["last-cycle-finished"] is not None)

        d = self.poll(_check)

        def _done(ignored):
            state = c.get_state()
            self.failUnless(state["last-cycle-finished"] is not None)

        d.addCallback(_done)
        return d
Example #8
0
    def test_service(self):
        self.basedir = "crawler/Basic/service"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]

        statefile = os.path.join(self.basedir, "statefile")
        c = BucketEnumeratingCrawler(ss, statefile)
        c.setServiceParent(self.s)

        # it should be legal to call get_state() and get_progress() right
        # away, even before the first tick is performed. No work should have
        # been done yet.
        s = c.get_state()
        p = c.get_progress()
        self.failUnlessEqual(s["last-complete-prefix"], None)
        self.failUnlessEqual(s["current-cycle"], None)
        self.failUnlessEqual(p["cycle-in-progress"], False)

        d = c.finished_d

        def _check(ignored):
            self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        d.addCallback(_check)
        return d
Example #9
0
    def test_paced_service(self):
        self.basedir = "crawler/Basic/paced_service"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]

        statefile = os.path.join(self.basedir, "statefile")
        c = PacedCrawler(ss, statefile)

        did_check_progress = [False]

        def check_progress():
            c.yield_cb = None
            try:
                p = c.get_progress()
                self.failUnlessEqual(p["cycle-in-progress"], True)
                pct = p["cycle-complete-percentage"]
                # after 6 buckets, we happen to be at 76.17% complete. As
                # long as we create shares in deterministic order, this will
                # continue to be true.
                self.failUnlessEqual(int(pct), 76)
                left = p["remaining-sleep-time"]
                self.failUnless(isinstance(left, float), left)
                self.failUnless(left > 0.0, left)
            except Exception, e:
                did_check_progress[0] = e
            else:
Example #10
0
    def test_oneshot(self):
        self.basedir = "crawler/Basic/oneshot"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(30):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = OneShotCrawler(ss, statefile)
        c.setServiceParent(self.s)

        d = c.finished_d
        def _finished_first_cycle(ignored):
            return fireEventually(c.counter)
        d.addCallback(_finished_first_cycle)
        def _check(old_counter):
            # the crawler should do any work after it's been stopped
            self.failUnlessEqual(old_counter, c.counter)
            self.failIf(c.running)
            self.failIf(c.timer)
            self.failIf(c.current_sleep_time)
            s = c.get_state()
            self.failUnlessEqual(s["last-cycle-finished"], 0)
            self.failUnlessEqual(s["current-cycle"], None)
        d.addCallback(_check)
        return d
Example #11
0
    def test_paced_service(self):
        self.basedir = "crawler/Basic/paced_service"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        sis = [self.write(i, ss, serverid) for i in range(10)]

        statefile = os.path.join(self.basedir, "statefile")
        c = PacedCrawler(ss, statefile)

        did_check_progress = [False]
        def check_progress():
            c.yield_cb = None
            try:
                p = c.get_progress()
                self.failUnlessEqual(p["cycle-in-progress"], True)
                pct = p["cycle-complete-percentage"]
                # after 6 buckets, we happen to be at 76.17% complete. As
                # long as we create shares in deterministic order, this will
                # continue to be true.
                self.failUnlessEqual(int(pct), 76)
                left = p["remaining-sleep-time"]
                self.failUnless(isinstance(left, float), left)
                self.failUnless(left > 0.0, left)
            except Exception, e:
                did_check_progress[0] = e
            else:
Example #12
0
 def make_server(self, i, readonly=False):
     serverid = hashutil.tagged_hash("serverid", str(i))[:20]
     serverdir = os.path.join(self.basedir, "servers",
                              idlib.shortnodeid_b2a(serverid), "storage")
     fileutil.make_dirs(serverdir)
     ss = StorageServer(serverdir, serverid, stats_provider=SimpleStats(),
                        readonly_storage=readonly)
     ss._no_network_server_number = i
     return ss
Example #13
0
 def make_server(self, i, readonly=False):
     serverid = hashutil.tagged_hash("serverid", str(i))[:20]
     serverdir = os.path.join(self.basedir, "servers",
                              idlib.shortnodeid_b2a(serverid), "storage")
     fileutil.make_dirs(serverdir)
     ss = StorageServer(serverdir, serverid, stats_provider=SimpleStats(),
                        readonly_storage=readonly)
     ss._no_network_server_number = i
     return ss
Example #14
0
    def OFF_test_cpu_usage(self):
        # this test can't actually assert anything, because too many
        # buildslave machines are slow. But on a fast developer machine, it
        # can produce interesting results. So if you care about how well the
        # Crawler is accomplishing it's run-slowly goals, re-enable this test
        # and read the stdout when it runs.

        self.basedir = "crawler/Basic/cpu_usage"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(10):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = ConsumingCrawler(ss, statefile)
        c.setServiceParent(self.s)

        # this will run as fast as it can, consuming about 50ms per call to
        # process_bucket(), limited by the Crawler to about 50% cpu. We let
        # it run for a few seconds, then compare how much time
        # process_bucket() got vs wallclock time. It should get between 10%
        # and 70% CPU. This is dicey, there's about 100ms of overhead per
        # 300ms slice (saving the state file takes about 150-200us, but we do
        # it 1024 times per cycle, one for each [empty] prefixdir), leaving
        # 200ms for actual processing, which is enough to get through 4
        # buckets each slice, then the crawler sleeps for 300ms/0.5 = 600ms,
        # giving us 900ms wallclock per slice. In 4.0 seconds we can do 4.4
        # slices, giving us about 17 shares, so we merely assert that we've
        # finished at least one cycle in that time.

        # with a short cpu_slice (so we can keep this test down to 4
        # seconds), the overhead is enough to make a nominal 50% usage more
        # like 30%. Forcing sleep_time to 0 only gets us 67% usage.

        start = time.time()
        d = self.stall(delay=4.0)

        def _done(res):
            elapsed = time.time() - start
            percent = 100.0 * c.accumulated / elapsed
            # our buildslaves vary too much in their speeds and load levels,
            # and many of them only manage to hit 7% usage when our target is
            # 50%. So don't assert anything about the results, just log them.
            print()
            print("crawler: got %d%% percent when trying for 50%%" % percent)
            print("crawler: got %d full cycles" % c.cycles)

        d.addCallback(_done)
        return d
Example #15
0
    def OFF_test_cpu_usage(self):
        # this test can't actually assert anything, because too many
        # buildslave machines are slow. But on a fast developer machine, it
        # can produce interesting results. So if you care about how well the
        # Crawler is accomplishing it's run-slowly goals, re-enable this test
        # and read the stdout when it runs.

        self.basedir = "crawler/Basic/cpu_usage"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        for i in range(10):
            self.write(i, ss, serverid)

        statefile = os.path.join(self.basedir, "statefile")
        c = ConsumingCrawler(ss, statefile)
        c.setServiceParent(self.s)

        # this will run as fast as it can, consuming about 50ms per call to
        # process_bucket(), limited by the Crawler to about 50% cpu. We let
        # it run for a few seconds, then compare how much time
        # process_bucket() got vs wallclock time. It should get between 10%
        # and 70% CPU. This is dicey, there's about 100ms of overhead per
        # 300ms slice (saving the state file takes about 150-200us, but we do
        # it 1024 times per cycle, one for each [empty] prefixdir), leaving
        # 200ms for actual processing, which is enough to get through 4
        # buckets each slice, then the crawler sleeps for 300ms/0.5 = 600ms,
        # giving us 900ms wallclock per slice. In 4.0 seconds we can do 4.4
        # slices, giving us about 17 shares, so we merely assert that we've
        # finished at least one cycle in that time.

        # with a short cpu_slice (so we can keep this test down to 4
        # seconds), the overhead is enough to make a nominal 50% usage more
        # like 30%. Forcing sleep_time to 0 only gets us 67% usage.

        start = time.time()
        d = self.stall(delay=4.0)
        def _done(res):
            elapsed = time.time() - start
            percent = 100.0 * c.accumulated / elapsed
            # our buildslaves vary too much in their speeds and load levels,
            # and many of them only manage to hit 7% usage when our target is
            # 50%. So don't assert anything about the results, just log them.
            print
            print "crawler: got %d%% percent when trying for 50%%" % percent
            print "crawler: got %d full cycles" % c.cycles
        d.addCallback(_done)
        return d
Example #16
0
    def test_not_enough_shares(self):
        """
        If fewer shares are found than are required to reassemble the data then
        ``CHKCheckerAndUEBFetcher.check`` returns a ``Deferred`` that fires
        with ``False``.
        """
        storage_index = b"a" * 16
        serverid = b"b" * 20
        storage = StorageServer(self.mktemp(), serverid)
        rref_with_ueb = LocalWrapper(storage, fireNow)
        ueb = {
            "needed_shares": 2,
            "total_shares": 2,
            "segment_size": 128 * 1024,
            "size": 1024,
        }
        yield write_good_share(rref_with_ueb, storage_index, ueb, [0])

        server_with_ueb = NoNetworkServer(serverid, rref_with_ueb)
        peers = {storage_index: [server_with_ueb]}
        caf = offloaded.CHKCheckerAndUEBFetcher(
            peers.get,
            storage_index,
            None,
        )
        self.assertThat(
            caf.check(),
            succeeded(Equals(False)),
        )
Example #17
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.get_config("storage", "enabled", True, boolean=True):
            return
        readonly = self.get_config("storage", "readonly", False, boolean=True)

        storedir = os.path.join(self.basedir, self.STOREDIR)

        data = self.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s"
                    % data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.get_config("storage", "debug_discard", False,
                                  boolean=True)

        expire = self.get_config("storage", "expire.enabled", False, boolean=True)
        if expire:
            mode = self.get_config("storage", "expire.mode") # require a mode
        else:
            mode = self.get_config("storage", "expire.mode", "age")

        o_l_d = self.get_config("storage", "expire.override_lease_duration", None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir, self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        self.add_service(ss)

        furl_file = os.path.join(self.basedir, "private", "storage.furl").encode(get_filesystem_encoding())
        furl = self.tub.registerReference(ss, furlFile=furl_file)
        ann = {"anonymous-storage-FURL": furl,
               "permutation-seed-base32": self._init_permutation_seed(ss),
               }
        self.introducer_client.publish("storage", ann, self._node_key)
Example #18
0
    def test_enough_shares(self):
        """
        If enough shares are found to reassemble the data then
        ``CHKCheckerAndUEBFetcher.check`` returns a ``Deferred`` that fires
        with share and share placement information.
        """
        storage_index = b"a" * 16
        serverids = list(ch * 20 for ch in [b"b", b"c"])
        storages = list(
            FoolscapStorageServer(StorageServer(self.mktemp(), serverid))
            for serverid in serverids)
        rrefs_with_ueb = list(
            LocalWrapper(storage, fireNow) for storage in storages)
        ueb = {
            "needed_shares": len(serverids),
            "total_shares": len(serverids),
            "segment_size": 128 * 1024,
            "size": 1024,
        }
        for n, rref_with_ueb in enumerate(rrefs_with_ueb):
            yield write_good_share(rref_with_ueb, storage_index, ueb, [n])

        servers_with_ueb = list(
            NoNetworkServer(serverid, rref_with_ueb)
            for (serverid, rref_with_ueb) in zip(serverids, rrefs_with_ueb))
        peers = {storage_index: servers_with_ueb}
        caf = offloaded.CHKCheckerAndUEBFetcher(
            peers.get,
            storage_index,
            None,
        )
        self.assertThat(
            caf.check(),
            succeeded(
                MatchesListwise([
                    Equals({
                        n: {serverid}
                        for (n, serverid) in enumerate(serverids)
                    }),
                    Equals(ueb),
                    IsInstance(bytes),
                ])),
        )
Example #19
0
 def test_check_ueb_unavailable(self):
     """
     If the UEB cannot be read from any of the peers supplied by the "peer
     getter" then ``CHKCheckerAndUEBFetcher.check`` returns a ``Deferred``
     that fires with ``False``.
     """
     storage_index = b"a" * 16
     serverid = b"b" * 20
     storage = StorageServer(self.mktemp(), serverid)
     rref_without_ueb = LocalWrapper(storage, fireNow)
     yield write_bad_share(rref_without_ueb, storage_index)
     server_without_ueb = NoNetworkServer(serverid, rref_without_ueb)
     peers = {storage_index: [server_without_ueb]}
     caf = offloaded.CHKCheckerAndUEBFetcher(
         peers.get,
         storage_index,
         None,
     )
     self.assertThat(
         caf.check(),
         succeeded(Equals(False)),
     )
Example #20
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.get_config("storage", "enabled", True, boolean=True):
            return
        readonly = self.get_config("storage", "readonly", False, boolean=True)

        storedir = os.path.join(self.basedir, self.STOREDIR)

        data = self.get_config("storage", "reserved_space", None)
        reserved = None
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s" %
                    data)
        if reserved is None:
            reserved = 0
        discard = self.get_config("storage",
                                  "debug_discard",
                                  False,
                                  boolean=True)

        expire = self.get_config("storage",
                                 "expire.enabled",
                                 False,
                                 boolean=True)
        if expire:
            mode = self.get_config("storage", "expire.mode")  # require a mode
        else:
            mode = self.get_config("storage", "expire.mode", "age")

        o_l_d = self.get_config("storage", "expire.override_lease_duration",
                                None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir,
                           self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        self.add_service(ss)

        d = self.when_tub_ready()

        # we can't do registerReference until the Tub is ready
        def _publish(res):
            furl_file = os.path.join(self.basedir, "private",
                                     "storage.furl").encode(
                                         get_filesystem_encoding())
            furl = self.tub.registerReference(ss, furlFile=furl_file)
            ri_name = RIStorageServer.__remote_name__
            self.introducer_client.publish(furl, "storage", ri_name)

        d.addCallback(_publish)
        d.addErrback(log.err,
                     facility="tahoe.init",
                     level=log.BAD,
                     umid="aLGBKw")
Example #21
0
 def _setUp(self):
     self.tempdir = FilePath(self.useFixture(TempDir()).join(b"storage"))
     self.storage_server = StorageServer(
         self.tempdir.asBytesMode().path,
         b"x" * 20,
     )
Example #22
0
    def test_paced(self):
        self.basedir = "crawler/Basic/paced"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        # put four buckets in each prefixdir
        sis = []
        for i in range(10):
            for tail in range(4):
                sis.append(self.write(i, ss, serverid, tail))

        statefile = os.path.join(self.basedir, "statefile")

        c = PacedCrawler(ss, statefile)
        c.load_state()
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop in the middle of one of the buckets. Since we
        # aren't using its normal scheduler, we have to save its state
        # manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice
        self.failUnlessEqual(len(c.all_buckets), 6)

        c.start_current_prefix(time.time())  # finish it
        self.failUnlessEqual(len(sis), len(c.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # make sure the statefile has been returned to the starting point
        c.finished_d = defer.Deferred()
        c.all_buckets = []
        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
        del c

        # start a new crawler, it should start from the beginning
        c = PacedCrawler(ss, statefile)
        c.load_state()
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop in the middle of one of the buckets. Since we
        # aren't using its normal scheduler, we have to save its state
        # manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice

        # a third crawler should pick up from where it left off
        c2 = PacedCrawler(ss, statefile)
        c2.all_buckets = c.all_buckets[:]
        c2.load_state()
        c2.countdown = -1
        c2.start_current_prefix(time.time())
        self.failUnlessEqual(len(sis), len(c2.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
        del c, c2

        # now stop it at the end of a bucket (countdown=4), to exercise a
        # different place that checks the time
        c = PacedCrawler(ss, statefile)
        c.load_state()
        c.countdown = 4
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop at the end of one of the buckets. Again we must
        # save state manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice
        self.failUnlessEqual(len(c.all_buckets), 4)
        c.start_current_prefix(time.time())  # finish it
        self.failUnlessEqual(len(sis), len(c.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
        del c

        # stop it again at the end of the bucket, check that a new checker
        # picks up correctly
        c = PacedCrawler(ss, statefile)
        c.load_state()
        c.countdown = 4
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop at the end of one of the buckets.
        c.save_state()

        c2 = PacedCrawler(ss, statefile)
        c2.all_buckets = c.all_buckets[:]
        c2.load_state()
        c2.countdown = -1
        c2.start_current_prefix(time.time())
        self.failUnlessEqual(len(sis), len(c2.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
        del c, c2
Example #23
0
    def get_anonymous_storage_server(self):
        """
        Get the anonymous ``IStorageServer`` implementation for this node.

        Note this will return an object even if storage is disabled on this
        node (but the object will not be exposed, peers will not be able to
        access it, and storage will remain disabled).

        The one and only instance for this node is always returned.  It is
        created first if necessary.
        """
        try:
            ss = self.getServiceNamed(StorageServer.name)
        except KeyError:
            pass
        else:
            return ss

        readonly = self.config.get_config("storage",
                                          "readonly",
                                          False,
                                          boolean=True)

        config_storedir = self.get_config(
            "storage",
            "storage_dir",
            self.STOREDIR,
        )
        storedir = self.config.get_config_path(config_storedir)

        data = self.config.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s" %
                    data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.config.get_config("storage",
                                         "debug_discard",
                                         False,
                                         boolean=True)

        expire = self.config.get_config("storage",
                                        "expire.enabled",
                                        False,
                                        boolean=True)
        if expire:
            mode = self.config.get_config("storage",
                                          "expire.mode")  # require a mode
        else:
            mode = self.config.get_config("storage", "expire.mode", "age")

        o_l_d = self.config.get_config("storage",
                                       "expire.override_lease_duration", None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.config.get_config("storage",
                                                 "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.config.get_config("storage",
                                  "expire.immutable",
                                  True,
                                  boolean=True):
            sharetypes.append("immutable")
        if self.config.get_config("storage",
                                  "expire.mutable",
                                  True,
                                  boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir,
                           self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        ss.setServiceParent(self)
        return ss
Example #24
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.config.get_config("storage", "enabled", True, boolean=True):
            return
        if not self._is_tub_listening():
            raise ValueError("config error: storage is enabled, but tub "
                             "is not listening ('tub.port=' is empty)")
        readonly = self.config.get_config("storage", "readonly", False, boolean=True)

        config_storedir = self.get_config(
            "storage", "storage_dir", self.STOREDIR,
        ).decode('utf-8')
        storedir = self.config.get_config_path(config_storedir)

        data = self.config.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s"
                    % data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.config.get_config("storage", "debug_discard", False,
                                         boolean=True)

        expire = self.config.get_config("storage", "expire.enabled", False, boolean=True)
        if expire:
            mode = self.config.get_config("storage", "expire.mode") # require a mode
        else:
            mode = self.config.get_config("storage", "expire.mode", "age")

        o_l_d = self.config.get_config("storage", "expire.override_lease_duration", None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.config.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.config.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.config.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir, self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        ss.setServiceParent(self)

        furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding())
        furl = self.tub.registerReference(ss, furlFile=furl_file)
        ann = {"anonymous-storage-FURL": furl,
               "permutation-seed-base32": self._init_permutation_seed(ss),
               }
        for ic in self.introducer_clients:
            ic.publish("storage", ann, self._node_key)
Example #25
0
    def test_paced(self):
        self.basedir = "crawler/Basic/paced"
        fileutil.make_dirs(self.basedir)
        serverid = "\x00" * 20
        ss = StorageServer(self.basedir, serverid)
        ss.setServiceParent(self.s)

        # put four buckets in each prefixdir
        sis = []
        for i in range(10):
            for tail in range(4):
                sis.append(self.write(i, ss, serverid, tail))

        statefile = os.path.join(self.basedir, "statefile")

        c = PacedCrawler(ss, statefile)
        c.load_state()
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop in the middle of one of the buckets. Since we
        # aren't using its normal scheduler, we have to save its state
        # manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice
        self.failUnlessEqual(len(c.all_buckets), 6)

        c.start_current_prefix(time.time()) # finish it
        self.failUnlessEqual(len(sis), len(c.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))

        # make sure the statefile has been returned to the starting point
        c.finished_d = defer.Deferred()
        c.all_buckets = []
        c.start_current_prefix(time.time())
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
        del c

        # start a new crawler, it should start from the beginning
        c = PacedCrawler(ss, statefile)
        c.load_state()
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop in the middle of one of the buckets. Since we
        # aren't using its normal scheduler, we have to save its state
        # manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice

        # a third crawler should pick up from where it left off
        c2 = PacedCrawler(ss, statefile)
        c2.all_buckets = c.all_buckets[:]
        c2.load_state()
        c2.countdown = -1
        c2.start_current_prefix(time.time())
        self.failUnlessEqual(len(sis), len(c2.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
        del c, c2

        # now stop it at the end of a bucket (countdown=4), to exercise a
        # different place that checks the time
        c = PacedCrawler(ss, statefile)
        c.load_state()
        c.countdown = 4
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop at the end of one of the buckets. Again we must
        # save state manually.
        c.save_state()
        c.cpu_slice = PacedCrawler.cpu_slice
        self.failUnlessEqual(len(c.all_buckets), 4)
        c.start_current_prefix(time.time()) # finish it
        self.failUnlessEqual(len(sis), len(c.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
        del c

        # stop it again at the end of the bucket, check that a new checker
        # picks up correctly
        c = PacedCrawler(ss, statefile)
        c.load_state()
        c.countdown = 4
        try:
            c.start_current_prefix(time.time())
        except TimeSliceExceeded:
            pass
        # that should stop at the end of one of the buckets.
        c.save_state()

        c2 = PacedCrawler(ss, statefile)
        c2.all_buckets = c.all_buckets[:]
        c2.load_state()
        c2.countdown = -1
        c2.start_current_prefix(time.time())
        self.failUnlessEqual(len(sis), len(c2.all_buckets))
        self.failUnlessEqual(sorted(sis), sorted(c2.all_buckets))
        del c, c2