Beispiel #1
0
    def _modify_and_retry(self, modifier, backoffer, first_time):
        """
        I try to apply modifier to the contents of this version of the
        mutable file. If I succeed, I return an UploadResults instance
        describing my success. If I fail, I try again after waiting for
        a little bit.
        """
        log.msg("doing modify")
        if first_time:
            d = self._update_servermap()
        else:
            # We ran into trouble; do MODE_CHECK so we're a little more
            # careful on subsequent tries.
            d = self._update_servermap(mode=MODE_CHECK)

        d.addCallback(lambda ignored:
            self._modify_once(modifier, first_time))
        def _retry(f):
            f.trap(UncoordinatedWriteError)
            # Uh oh, it broke. We're allowed to trust the servermap for our
            # first try, but after that we need to update it. It's
            # possible that we've failed due to a race with another
            # uploader, and if the race is to converge correctly, we
            # need to know about that upload.
            d2 = defer.maybeDeferred(backoffer, self, f)
            d2.addCallback(lambda ignored:
                           self._modify_and_retry(modifier,
                                                  backoffer, False))
            return d2
        d.addErrback(_retry)
        return d
Beispiel #2
0
 def remote_slot_readv(self, storage_index, shares, readv):
     start = time.time()
     self.count("readv")
     si_s = si_b2a(storage_index)
     lp = log.msg("storage: slot_readv %s %s" % (si_s, shares),
                  facility="tahoe.storage", level=log.OPERATIONAL)
     si_dir = storage_index_to_dir(storage_index)
     # shares exist if there is a file for them
     bucketdir = os.path.join(self.sharedir, si_dir)
     if not os.path.isdir(bucketdir):
         self.add_latency("readv", time.time() - start)
         return {}
     datavs = {}
     for sharenum_s in os.listdir(bucketdir):
         try:
             sharenum = int(sharenum_s)
         except ValueError:
             continue
         if sharenum in shares or not shares:
             filename = os.path.join(bucketdir, sharenum_s)
             msf = MutableShareFile(filename, self)
             datavs[sharenum] = msf.readv(readv)
     log.msg("returning shares %s" % (datavs.keys(),),
             facility="tahoe.storage", level=log.NOISY, parent=lp)
     self.add_latency("readv", time.time() - start)
     return datavs
Beispiel #3
0
 def _lost(self):
     log.msg(format="lost connection to %(serverid)s",
             serverid=self.serverid_s,
             facility="tahoe.storage_broker", umid="zbRllw")
     self.last_loss_time = time.time()
     self.rref = None
     self.remote_host = None
Beispiel #4
0
    def init_introducer_clients(self):
        self.introducer_clients = []
        self.introducer_furls = []

        introducers_yaml_filename = os.path.join(self.basedir, "private", "introducers.yaml")
        introducers_filepath = FilePath(introducers_yaml_filename)

        try:
            with introducers_filepath.open() as f:
                introducers_yaml = yamlutil.safe_load(f)
                introducers = introducers_yaml.get("introducers", {})
                log.msg("found %d introducers in private/introducers.yaml" %
                        len(introducers))
        except EnvironmentError:
            introducers = {}

        if "default" in introducers.keys():
            raise ValueError("'default' introducer furl cannot be specified in introducers.yaml; please fix impossible configuration.")

        # read furl from tahoe.cfg
        tahoe_cfg_introducer_furl = self.get_config("client", "introducer.furl", None)
        if tahoe_cfg_introducer_furl:
            introducers[u'default'] = {'furl':tahoe_cfg_introducer_furl}

        for petname, introducer in introducers.items():
            introducer_cache_filepath = FilePath(os.path.join(self.basedir, "private", "introducer_{}_cache.yaml".format(petname)))
            ic = IntroducerClient(self.tub, introducer['furl'],
                                  self.nickname,
                                  str(allmydata.__full_version__),
                                  str(self.OLDEST_SUPPORTED_VERSION),
                                  self.get_app_versions(), self._sequencer, introducer_cache_filepath)
            self.introducer_clients.append(ic)
            self.introducer_furls.append(introducer['furl'])
            ic.setServiceParent(self)
Beispiel #5
0
 def remote_abort(self):
     log.msg("storage: aborting sharefile %s" % self.incominghome,
             facility="tahoe.storage", level=log.UNUSUAL)
     if not self.closed:
         self._canary.dontNotifyOnDisconnect(self._disconnect_marker)
     self._abort()
     self.ss.count("abort")
Beispiel #6
0
 def render_POST(self, ctx):
     req = IRequest(ctx)
     log.msg(format="User reports incident through web page: %(details)s",
             details=get_arg(req, "details", ""),
             level=log.WEIRD, umid="LkD9Pw")
     req.setHeader("content-type", "text/plain")
     return "An incident report has been saved to logs/incidents/ in the node directory."
Beispiel #7
0
 def _deliver(result):
     log.msg(format="delivering segment(%(segnum)d)",
             segnum=segnum,
             level=log.OPERATIONAL, parent=self._lp,
             umid="j60Ojg")
     when = now()
     if isinstance(result, Failure):
         # this catches failures in decode or ciphertext hash
         for (d,c,seg_ev) in self._extract_requests(segnum):
             seg_ev.error(when)
             eventually(self._deliver, d, c, result)
     else:
         (offset, segment, decodetime) = result
         for (d,c,seg_ev) in self._extract_requests(segnum):
             # when we have two requests for the same segment, the
             # second one will not be "activated" before the data is
             # delivered, so to allow the status-reporting code to see
             # consistent behavior, we activate them all now. The
             # SegmentEvent will ignore duplicate activate() calls.
             # Note that this will result in an inaccurate "receive
             # speed" for the second request.
             seg_ev.activate(when)
             seg_ev.deliver(when, offset, len(segment), decodetime)
             eventually(self._deliver, d, c, result)
     self._active_segment = None
     self._start_new_segment()
Beispiel #8
0
 def _error(f):
     lp = log.msg("error during GET", facility="tahoe.webish", failure=f,
                  level=log.UNUSUAL, umid="xSiF3w")
     if finished:
         log.msg("but it's too late to tell them", parent=lp,
                 level=log.UNUSUAL, umid="j1xIbw")
         return
     req._tahoe_request_had_error = f # for HTTP-style logging
     if req.startedWriting:
         # The content-type is already set, and the response code has
         # already been sent, so we can't provide a clean error
         # indication. We can emit text (which a browser might
         # interpret as something else), and if we sent a Size header,
         # they might notice that we've truncated the data. Keep the
         # error message small to improve the chances of having our
         # error response be shorter than the intended results.
         #
         # We don't have a lot of options, unfortunately.
         req.write("problem during download\n")
         req.finish()
     else:
         # We haven't written anything yet, so we can provide a
         # sensible error message.
         eh = MyExceptionHandler()
         eh.renderHTTP_exception(ctx, f)
Beispiel #9
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.get_config("storage", "enabled", True, boolean=True):
            return
        readonly = self.get_config("storage", "readonly", False, boolean=True)

        storedir = os.path.join(self.basedir, self.STOREDIR)

        data = self.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s"
                    % data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.get_config("storage", "debug_discard", False,
                                  boolean=True)

        expire = self.get_config("storage", "expire.enabled", False, boolean=True)
        if expire:
            mode = self.get_config("storage", "expire.mode") # require a mode
        else:
            mode = self.get_config("storage", "expire.mode", "age")

        o_l_d = self.get_config("storage", "expire.override_lease_duration", None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir, self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        self.add_service(ss)

        furl_file = os.path.join(self.basedir, "private", "storage.furl").encode(get_filesystem_encoding())
        furl = self.tub.registerReference(ss, furlFile=furl_file)
        ann = {"anonymous-storage-FURL": furl,
               "permutation-seed-base32": self._init_permutation_seed(ss),
               }
        self.introducer_client.publish("storage", ann, self._node_key)
Beispiel #10
0
 def render_POST(self, ctx):
     req = IRequest(ctx)
     log.msg(format="User reports incident through web page: %(details)s",
             details=get_arg(req, "details", ""),
             level=log.WEIRD, umid="LkD9Pw")
     req.setHeader("content-type", "text/plain")
     return "Thank you for your report!"
Beispiel #11
0
 def _fetch_next(self):
     if self._size == 0:
         # done!
         self._alive = False
         self._hungry = False
         self._deferred.callback(self._consumer)
         return
     n = self._node
     have_actual_segment_size = n.segment_size is not None
     guess_s = ""
     if not have_actual_segment_size:
         guess_s = "probably "
     segment_size = n.segment_size or n.guessed_segment_size
     if self._offset == 0:
         # great! we want segment0 for sure
         wanted_segnum = 0
     else:
         # this might be a guess
         wanted_segnum = self._offset // segment_size
     log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d",
             offset=self._offset, guess=guess_s, segnum=wanted_segnum,
             level=log.NOISY, parent=self._lp, umid="5WfN0w")
     self._active_segnum = wanted_segnum
     d,c = n.get_segment(wanted_segnum, self._lp)
     self._cancel_segment_request = c
     d.addBoth(self._request_retired)
     d.addCallback(self._got_segment, wanted_segnum)
     if not have_actual_segment_size:
         # we can retry once
         d.addErrback(self._retry_bad_segment)
     d.addErrback(self._error)
Beispiel #12
0
    def init_magic_folder(self):
        #print "init_magic_folder"
        if self.config.get_config("drop_upload", "enabled", False, boolean=True):
            raise node.OldConfigOptionError(
                "The [drop_upload] section must be renamed to [magic_folder].\n"
                "See docs/frontends/magic-folder.rst for more information."
            )

        if self.config.get_config("magic_folder", "enabled", False, boolean=True):
            from allmydata.frontends import magic_folder

            try:
                magic_folders = magic_folder.load_magic_folders(self.config._basedir)
            except Exception as e:
                log.msg("Error loading magic-folder config: {}".format(e))
                raise

            # start processing the upload queue when we've connected to
            # enough servers
            threshold = min(self.encoding_params["k"],
                            self.encoding_params["happy"] + 1)

            for (name, mf_config) in magic_folders.items():
                self.log("Starting magic_folder '{}'".format(name))
                s = magic_folder.MagicFolder.from_config(self, name, mf_config)
                self._magic_folders[name] = s
                s.setServiceParent(self)

                connected_d = self.storage_broker.when_connected_enough(threshold)
                def connected_enough(ign, mf):
                    mf.ready()  # returns a Deferred we ignore
                    return None
                connected_d.addCallback(connected_enough, s)
Beispiel #13
0
        def _got(data):
            if self._aborted:
                raise UploadAborted()
            encrypted_pieces = []
            length = 0
            while data:
                encrypted_piece = data.pop(0)
                length += len(encrypted_piece)
                crypttext_segment_hasher.update(encrypted_piece)
                self._crypttext_hasher.update(encrypted_piece)
                encrypted_pieces.append(encrypted_piece)

            precondition(length <= input_chunk_size,
                         "length=%d > input_chunk_size=%d" %
                         (length, input_chunk_size))
            if allow_short:
                if length < input_chunk_size:
                    # padding
                    pad_size = input_chunk_size - length
                    encrypted_pieces.append('\x00' * pad_size)
            else:
                # non-tail segments should be the full segment size
                if length != input_chunk_size:
                    log.msg("non-tail segment should be full segment size: %d!=%d"
                            % (length, input_chunk_size),
                            level=log.BAD, umid="jNk5Yw")
                precondition(length == input_chunk_size,
                             "length=%d != input_chunk_size=%d" %
                             (length, input_chunk_size))

            encrypted_piece = "".join(encrypted_pieces)
            return previous_chunks + [encrypted_piece]
Beispiel #14
0
    def get_stats(self):
        # remember: RIStatsProvider requires that our return dict
        # contains numeric values.
        stats = { 'storage_server.allocated': self.allocated_size(), }
        stats['storage_server.reserved_space'] = self.reserved_space
        for category,ld in self.get_latencies().items():
            for name,v in ld.items():
                stats['storage_server.latencies.%s.%s' % (category, name)] = v

        try:
            disk = fileutil.get_disk_stats(self.sharedir, self.reserved_space)
            writeable = disk['avail'] > 0

            # spacetime predictors should use disk_avail / (d(disk_used)/dt)
            stats['storage_server.disk_total'] = disk['total']
            stats['storage_server.disk_used'] = disk['used']
            stats['storage_server.disk_free_for_root'] = disk['free_for_root']
            stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot']
            stats['storage_server.disk_avail'] = disk['avail']
        except AttributeError:
            writeable = True
        except EnvironmentError:
            log.msg("OS call to get disk statistics failed", level=log.UNUSUAL)
            writeable = False

        if self.readonly_storage:
            stats['storage_server.disk_avail'] = 0
            writeable = False

        stats['storage_server.accepting_immutable_shares'] = int(writeable)
        s = self.bucket_counter.get_state()
        bucket_count = s.get("last-complete-bucket-count")
        if bucket_count:
            stats['storage_server.total_bucket_count'] = bucket_count
        return stats
Beispiel #15
0
 def get_stats(self):
     stats = {}
     for sp in self.stats_producers:
         stats.update(sp.get_stats())
     ret = { 'counters': self.counters, 'stats': stats }
     log.msg(format='get_stats() -> %(stats)s', stats=ret, level=log.NOISY)
     return ret
	def _crawl_fold(self, fold, info):
		sublst = list( (fold, ci) for ci in
			(yield self._do_request('listdir', self.err503_wrapper, self._listdir, info['id'])) )
		if not sublst:
			log.msg( 'Pruning empty subdir: {} (id: {})'\
				.format(fold, info['id']), level=log.OPERATIONAL )
			yield self._do_request('delete empty subdir', self._rmdir, info['id'])
		defer.returnValue((fold, sublst))
Beispiel #17
0
 def stop(self):
     log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix,
             level=log.NOISY, parent=self._lp, umid="LWyqpg")
     self._cancel_all_requests()
     self._running = False
     # help GC ??? XXX
     del self._shares, self._shares_from_server, self._active_share_map
     del self._share_observers
Beispiel #18
0
    def __init__(self, storedir, nodeid, reserved_space=0,
                 discard_storage=False, readonly_storage=False,
                 stats_provider=None,
                 expiration_enabled=False,
                 expiration_mode="age",
                 expiration_override_lease_duration=None,
                 expiration_cutoff_date=None,
                 expiration_sharetypes=("mutable", "immutable")):
        service.MultiService.__init__(self)
        assert isinstance(nodeid, str)
        assert len(nodeid) == 20
        self.my_nodeid = nodeid
        self.storedir = storedir
        sharedir = os.path.join(storedir, "shares")
        fileutil.make_dirs(sharedir)
        self.sharedir = sharedir
        # we don't actually create the corruption-advisory dir until necessary
        self.corruption_advisory_dir = os.path.join(storedir,
                                                    "corruption-advisories")
        self.reserved_space = int(reserved_space)
        self.no_storage = discard_storage
        self.readonly_storage = readonly_storage
        self.stats_provider = stats_provider
        if self.stats_provider:
            self.stats_provider.register_producer(self)
        self.incomingdir = os.path.join(sharedir, 'incoming')
        self._clean_incomplete()
        fileutil.make_dirs(self.incomingdir)
        self._active_writers = weakref.WeakKeyDictionary()
        log.msg("StorageServer created", facility="tahoe.storage")

        if reserved_space:
            if self.get_available_space() is None:
                log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored",
                        umin="0wZ27w", level=log.UNUSUAL)

        self.latencies = {"allocate": [], # immutable
                          "write": [],
                          "close": [],
                          "read": [],
                          "get": [],
                          "writev": [], # mutable
                          "readv": [],
                          "add-lease": [], # both
                          "renew": [],
                          "cancel": [],
                          }
        self.add_bucket_counter()

        statefile = os.path.join(self.storedir, "lease_checker.state")
        historyfile = os.path.join(self.storedir, "lease_checker.history")
        klass = self.LeaseCheckerClass
        self.lease_checker = klass(self, statefile, historyfile,
                                   expiration_enabled, expiration_mode,
                                   expiration_override_lease_duration,
                                   expiration_cutoff_date,
                                   expiration_sharetypes)
        self.lease_checker.setServiceParent(self)
Beispiel #19
0
    def _unpack_contents(self, data):
        # the directory is serialized as a list of netstrings, one per child.
        # Each child is serialized as a list of four netstrings: (name, ro_uri,
        # rwcapdata, metadata), in which the name, ro_uri, metadata are in
        # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC.
        # The rwcapdata is formatted as:
        # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac)
        assert isinstance(data, str), (repr(data), type(data))
        # an empty directory is serialized as an empty string
        if data == "":
            return AuxValueDict()
        writeable = not self.is_readonly()
        mutable = self.is_mutable()
        children = AuxValueDict()
        position = 0
        while position < len(data):
            entries, position = split_netstring(data, 1, position)
            entry = entries[0]
            (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
            if not mutable and len(rwcapdata) > 0:
                raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty")

            # A name containing characters that are unassigned in one version of Unicode might
            # not be normalized wrt a later version. See the note in section 'Normalization Stability'
            # at <http://unicode.org/policies/stability_policy.html>.
            # Therefore we normalize names going both in and out of directories.
            name = normalize(namex_utf8.decode("utf-8"))

            rw_uri = ""
            if writeable:
                rw_uri = self._decrypt_rwcapdata(rwcapdata)

            # Since the encryption uses CTR mode, it currently leaks the length of the
            # plaintext rw_uri -- and therefore whether it is present, i.e. whether the
            # dirnode is writeable (ticket #925). By stripping trailing spaces in
            # Tahoe >= 1.6.0, we may make it easier for future versions to plug this leak.
            # ro_uri is treated in the same way for consistency.
            # rw_uri and ro_uri will be either None or a non-empty string.

            rw_uri = rw_uri.rstrip(' ') or None
            ro_uri = ro_uri.rstrip(' ') or None

            try:
                child = self._create_and_validate_node(rw_uri, ro_uri, name)
                if mutable or child.is_allowed_in_immutable_directory():
                    metadata = simplejson.loads(metadata_s)
                    assert isinstance(metadata, dict)
                    children[name] = (child, metadata)
                    children.set_with_aux(name, (child, metadata), auxilliary=entry)
                else:
                    log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
                                   name=quote_output(name, encoding='utf-8'),
                                   facility="tahoe.webish", level=log.UNUSUAL)
            except CapConstraintError, e:
                log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
                               "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
                               facility="tahoe.webish", level=log.UNUSUAL)
Beispiel #20
0
 def got_shares(self, shares):
     if self.check_reneging:
         if self._no_more_shares:
             self.finished_d.errback(unittest.FailTest("The node was told by the share finder that it is destined to remain hungry, then was given another share."))
             return
     self.got += len(shares)
     log.msg("yyy 3 %s.got_shares(%s) got: %s" % (self, shares, self.got))
     if self.got == 3:
         self.finished_d.callback(True)
Beispiel #21
0
def _corrupt_crypttext_hash_tree_byte_x221(data, debug=False):
    """Scramble the file data -- the byte at offset 0x221 will have its 7th
    (b1) bit flipped.
    """
    sharevernum = struct.unpack(">L", data[0x0c:0x0c+4])[0]
    assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
    if debug:
        log.msg("original data: %r" % (data,))
    return data[:0x0c+0x221] + chr(ord(data[0x0c+0x221])^0x02) + data[0x0c+0x2210+1:]
Beispiel #22
0
 def _done(filenode):
     log.msg("webish upload complete", facility="tahoe.webish", level=log.NOISY, umid="TCjBGQ")
     if self.node:
         # we've replaced an existing file (or modified a mutable
         # file), so the response code is 200
         req.setResponseCode(http.OK)
     else:
         # we've created a new file, so the code is 201
         req.setResponseCode(http.CREATED)
     return filenode.get_uri()
Beispiel #23
0
    def _got_versioned_service(self, rref, lp):
        log.msg(format="%(name)s provided version info %(version)s",
                name=self.name(), version=rref.version,
                facility="tahoe.storage_broker", umid="SWmJYg",
                level=log.NOISY, parent=lp)

        self.last_connect_time = time.time()
        self.remote_host = rref.getPeer()
        self.rref = rref
        rref.notifyOnDisconnect(self._lost)
Beispiel #24
0
 def remote_get_buckets(self, storage_index):
     start = time.time()
     self.count("get")
     si_s = si_b2a(storage_index)
     log.msg("storage: get_buckets %s" % si_s)
     bucketreaders = {} # k: sharenum, v: BucketReader
     for shnum, filename in self._get_bucket_shares(storage_index):
         bucketreaders[shnum] = BucketReader(self, filename,
                                             storage_index, shnum)
     self.add_latency("get", time.time() - start)
     return bucketreaders
Beispiel #25
0
 def _lost(self):
     log.msg(format="lost connection to %(name)s", name=self.get_name(),
             facility="tahoe.storage_broker", umid="zbRllw")
     self.last_loss_time = time.time()
     # self.rref is now stale: all callRemote()s will get a
     # DeadReferenceError. We leave the stale reference in place so that
     # uploader/downloader code (which received this IServer through
     # get_connected_servers() or get_servers_for_psi()) can continue to
     # use s.get_rref().callRemote() and not worry about it being None.
     self._is_connected = False
     self.remote_host = None
Beispiel #26
0
def corrupt_field(data, offset, size, debug=False):
    if random.random() < 0.5:
        newdata = testutil.flip_one_bit(data, offset, size)
        if debug:
            log.msg("testing: corrupting offset %d, size %d flipping one bit orig: %r, newdata: %r" % (offset, size, data[offset:offset+size], newdata[offset:offset+size]))
        return newdata
    else:
        newval = testutil.insecurerandstr(size)
        if debug:
            log.msg("testing: corrupting offset %d, size %d randomizing field, orig: %r, newval: %r" % (offset, size, data[offset:offset+size], newval))
        return data[:offset]+newval+data[offset+size:]
Beispiel #27
0
 def _start_new_segment(self):
     if self._active_segment is None and self._segment_requests:
         segnum = self._segment_requests[0][0]
         k = self._verifycap.needed_shares
         lp = self._segment_requests[0][3]
         log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d",
                 node=repr(self), segnum=segnum,
                 level=log.NOISY, parent=lp, umid="wAlnHQ")
         self._active_segment = fetcher = SegmentFetcher(self, segnum, k, lp)
         active_shares = [s for s in self._shares if s.is_alive()]
         fetcher.add_shares(active_shares) # this triggers the loop
def canonical_encoding(encoding):
    if encoding is None:
        log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD)
        encoding = 'utf-8'
    encoding = encoding.lower()
    if encoding == "cp65001":
        encoding = 'utf-8'
    elif encoding == "us-ascii" or encoding == "646" or encoding == "ansi_x3.4-1968":
        encoding = 'ascii'

    return encoding
Beispiel #29
0
 def stopProducing(self):
     log.msg("asked to stopProducing",
             level=log.NOISY, parent=self._lp, umid="XIyL9w")
     self._hungry = False
     self._alive = False
     # cancel any outstanding segment request
     if self._cancel_segment_request:
         self._cancel_segment_request.cancel()
         self._cancel_segment_request = None
     e = DownloadStopped("our Consumer called stopProducing()")
     self._deferred.errback(e)
Beispiel #30
0
    def get_available_space(self):
        """Returns available space for share storage in bytes, or None if no
        API to get this information is available."""

        if self.readonly_storage:
            return 0
        try:
            return self.get_disk_stats()['avail']
        except AttributeError:
            return None
        except EnvironmentError:
            log.msg("OS call to get disk statistics failed", level=log.UNUSUAL)
            return 0
Beispiel #31
0
    def __init__(self,
                 filenode,
                 storage_broker,
                 servermap,
                 verinfo,
                 fetch_privkey=False,
                 verify=False):
        self._node = filenode
        assert self._node.get_pubkey()
        self._storage_broker = storage_broker
        self._storage_index = filenode.get_storage_index()
        assert self._node.get_readkey()
        self._last_failure = None
        prefix = si_b2a(self._storage_index)[:5]
        self._log_number = log.msg("Retrieve(%s): starting" % prefix)
        self._running = True
        self._decoding = False
        self._bad_shares = set()

        self.servermap = servermap
        assert self._node.get_pubkey()
        self.verinfo = verinfo
        # during repair, we may be called upon to grab the private key, since
        # it wasn't picked up during a verify=False checker run, and we'll
        # need it for repair to generate a new version.
        self._need_privkey = verify or (fetch_privkey
                                        and not self._node.get_privkey())

        if self._need_privkey:
            # TODO: Evaluate the need for this. We'll use it if we want
            # to limit how many queries are on the wire for the privkey
            # at once.
            self._privkey_query_markers = []  # one Marker for each time we've
            # tried to get the privkey.

        # verify means that we are using the downloader logic to verify all
        # of our shares. This tells the downloader a few things.
        #
        # 1. We need to download all of the shares.
        # 2. We don't need to decode or decrypt the shares, since our
        #    caller doesn't care about the plaintext, only the
        #    information about which shares are or are not valid.
        # 3. When we are validating readers, we need to validate the
        #    signature on the prefix. Do we? We already do this in the
        #    servermap update?
        self._verify = verify

        self._status = RetrieveStatus()
        self._status.set_storage_index(self._storage_index)
        self._status.set_helper(False)
        self._status.set_progress(0.0)
        self._status.set_active(True)
        (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
         offsets_tuple) = self.verinfo
        self._status.set_size(datalength)
        self._status.set_encoding(k, N)
        self.readers = {}
        self._stopped = False
        self._pause_deferred = None
        self._offset = None
        self._read_length = None
        self.log("got seqnum %d" % self.verinfo[0])
Beispiel #32
0
    def _make_checker_results(self, smap):
        self._monitor.raise_if_cancelled()
        healthy = True
        report = []
        summary = []
        vmap = smap.make_versionmap()
        recoverable = smap.recoverable_versions()
        unrecoverable = smap.unrecoverable_versions()

        if recoverable:
            report.append("Recoverable Versions: " +
                          "/".join(["%d*%s" % (len(vmap[v]),
                                               smap.summarize_version(v))
                                    for v in recoverable]))
        if unrecoverable:
            report.append("Unrecoverable Versions: " +
                          "/".join(["%d*%s" % (len(vmap[v]),
                                               smap.summarize_version(v))
                                    for v in unrecoverable]))
        if smap.unrecoverable_versions():
            healthy = False
            summary.append("some versions are unrecoverable")
            report.append("Unhealthy: some versions are unrecoverable")
        if len(recoverable) == 0:
            healthy = False
            summary.append("no versions are recoverable")
            report.append("Unhealthy: no versions are recoverable")
        if len(recoverable) > 1:
            healthy = False
            summary.append("multiple versions are recoverable")
            report.append("Unhealthy: there are multiple recoverable versions")

        if recoverable:
            best_version = smap.best_recoverable_version()
            report.append("Best Recoverable Version: " +
                          smap.summarize_version(best_version))
            counters = self._count_shares(smap, best_version)
            s = counters["count-shares-good"]
            k = counters["count-shares-needed"]
            N = counters["count-shares-expected"]
            if s < N:
                healthy = False
                report.append("Unhealthy: best version has only %d shares "
                              "(encoding is %d-of-%d)" % (s, k, N))
                summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
        elif unrecoverable:
            healthy = False
            # find a k and N from somewhere
            first = list(unrecoverable)[0]
            # not exactly the best version, but that doesn't matter too much
            counters = self._count_shares(smap, first)
        else:
            # couldn't find anything at all
            counters = {
                "count-shares-good": 0,
                "count-shares-needed": 3, # arbitrary defaults
                "count-shares-expected": 10,
                "count-good-share-hosts": 0,
                "count-wrong-shares": 0,
                }

        corrupt_share_locators = []
        problems = []
        if self.bad_shares:
            report.append("Corrupt Shares:")
            summary.append("Corrupt Shares:")
        for (server, shnum, f) in sorted(self.bad_shares):
            serverid = server.get_serverid()
            locator = (server, self._storage_index, shnum)
            corrupt_share_locators.append(locator)
            s = "%s-sh%d" % (server.get_name(), shnum)
            if f.check(CorruptShareError):
                ft = f.value.reason
            else:
                ft = str(f)
            report.append(" %s: %s" % (s, ft))
            summary.append(s)
            p = (serverid, self._storage_index, shnum, f)
            problems.append(p)
            msg = ("CorruptShareError during mutable verify, "
                   "serverid=%(serverid)s, si=%(si)s, shnum=%(shnum)d, "
                   "where=%(where)s")
            log.msg(format=msg, serverid=server.get_name(),
                    si=base32.b2a(self._storage_index),
                    shnum=shnum,
                    where=ft,
                    level=log.WEIRD, umid="EkK8QA")

        sharemap = dictutil.DictOfSets()
        for verinfo in vmap:
            for (shnum, server, timestamp) in vmap[verinfo]:
                shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum)
                sharemap.add(shareid, server)
        if healthy:
            summary = "Healthy"
        else:
            summary = "Unhealthy: " + " ".join(summary)

        count_happiness = servers_of_happiness(sharemap)

        cr = CheckResults(from_string(self._node.get_uri()),
                          self._storage_index,
                          healthy=healthy, recoverable=bool(recoverable),
                          count_happiness=count_happiness,
                          count_shares_needed=counters["count-shares-needed"],
                          count_shares_expected=counters["count-shares-expected"],
                          count_shares_good=counters["count-shares-good"],
                          count_good_share_hosts=counters["count-good-share-hosts"],
                          count_recoverable_versions=len(recoverable),
                          count_unrecoverable_versions=len(unrecoverable),
                          servers_responding=list(smap.get_reachable_servers()),
                          sharemap=sharemap,
                          count_wrong_shares=counters["count-wrong-shares"],
                          list_corrupt_shares=corrupt_share_locators,
                          count_corrupt_shares=len(corrupt_share_locators),
                          list_incompatible_shares=[],
                          count_incompatible_shares=0,
                          summary=summary,
                          report=report,
                          share_problems=problems,
                          servermap=smap.copy())
        return cr
Beispiel #33
0
 def log(self, *args, **kwargs):
     return log.msg(*args, **kwargs)
Beispiel #34
0
 def log(self, *args, **kwargs):
     if "parent" not in kwargs:
         kwargs["parent"] = self._log_number
     if "facility" not in kwargs:
         kwargs["facility"] = "tahoe.encoder"
     return log.msg(*args, **kwargs)
Beispiel #35
0
 def log(self, *args, **kwargs):
     if 'facility' not in kwargs:
         kwargs['facility'] = "tahoe.helper.chk.checkandUEBfetch"
     if 'parent' not in kwargs:
         kwargs['parent'] = self._logparent
     return log.msg(*args, **kwargs)
Beispiel #36
0
    def remote_allocate_buckets(self,
                                storage_index,
                                renew_secret,
                                cancel_secret,
                                sharenums,
                                allocated_size,
                                canary,
                                owner_num=0):
        # owner_num is not for clients to set, but rather it should be
        # curried into the PersonalStorageServer instance that is dedicated
        # to a particular owner.
        start = time.time()
        self.count("allocate")
        alreadygot = set()
        bucketwriters = {}  # k: shnum, v: BucketWriter
        si_dir = storage_index_to_dir(storage_index)
        si_s = si_b2a(storage_index)

        log.msg("storage: allocate_buckets %s" % si_s)

        # in this implementation, the lease information (including secrets)
        # goes into the share files themselves. It could also be put into a
        # separate database. Note that the lease should not be added until
        # the BucketWriter has been closed.
        expire_time = time.time() + 31 * 24 * 60 * 60
        lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret,
                               expire_time, self.my_nodeid)

        max_space_per_bucket = allocated_size

        remaining_space = self.get_available_space()
        limited = remaining_space is not None
        if limited:
            # this is a bit conservative, since some of this allocated_size()
            # has already been written to disk, where it will show up in
            # get_available_space.
            remaining_space -= self.allocated_size()
        # self.readonly_storage causes remaining_space <= 0

        # fill alreadygot with all shares that we have, not just the ones
        # they asked about: this will save them a lot of work. Add or update
        # leases for all of them: if they want us to hold shares for this
        # file, they'll want us to hold leases for this file.
        for (shnum, fn) in self._get_bucket_shares(storage_index):
            alreadygot.add(shnum)
            sf = ShareFile(fn)
            sf.add_or_renew_lease(lease_info)

        for shnum in sharenums:
            incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum)
            finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum)
            if os.path.exists(finalhome):
                # great! we already have it. easy.
                pass
            elif os.path.exists(incominghome):
                # Note that we don't create BucketWriters for shnums that
                # have a partial share (in incoming/), so if a second upload
                # occurs while the first is still in progress, the second
                # uploader will use different storage servers.
                pass
            elif (not limited) or (remaining_space >= max_space_per_bucket):
                # ok! we need to create the new share file.
                bw = BucketWriter(self, incominghome, finalhome,
                                  max_space_per_bucket, lease_info, canary)
                if self.no_storage:
                    bw.throw_out_all_data = True
                bucketwriters[shnum] = bw
                self._active_writers[bw] = 1
                if limited:
                    remaining_space -= max_space_per_bucket
            else:
                # bummer! not enough space to accept this bucket
                pass

        if bucketwriters:
            fileutil.make_dirs(os.path.join(self.sharedir, si_dir))

        self.add_latency("allocate", time.time() - start)
        return alreadygot, bucketwriters
Beispiel #37
0
 def log(self, *args, **kwargs):
     if "facility" not in kwargs:
         kwargs["facility"] = "tahoe.storage"
     return log.msg(*args, **kwargs)
Beispiel #38
0
 def log_client_error(self, f, tubid):
     log.msg("StatsGatherer: error in get_stats(), peerid=%s" % tubid,
             level=log.UNUSUAL,
             failure=f)
Beispiel #39
0
    def _parse_and_validate(self, data):
        self.share_size = mathutil.div_ceil(self._verifycap.size,
                                            self._verifycap.needed_shares)

        d = uri.unpack_extension(data)

        # There are several kinds of things that can be found in a UEB.
        # First, things that we really need to learn from the UEB in order to
        # do this download. Next: things which are optional but not redundant
        # -- if they are present in the UEB they will get used. Next, things
        # that are optional and redundant. These things are required to be
        # consistent: they don't have to be in the UEB, but if they are in
        # the UEB then they will be checked for consistency with the
        # already-known facts, and if they are inconsistent then an exception
        # will be raised. These things aren't actually used -- they are just
        # tested for consistency and ignored. Finally: things which are
        # deprecated -- they ought not be in the UEB at all, and if they are
        # present then a warning will be logged but they are otherwise
        # ignored.

        # First, things that we really need to learn from the UEB:
        # segment_size, crypttext_root_hash, and share_root_hash.
        self.segment_size = d['segment_size']

        self.block_size = mathutil.div_ceil(self.segment_size,
                                            self._verifycap.needed_shares)
        self.num_segments = mathutil.div_ceil(self._verifycap.size,
                                              self.segment_size)

        self.tail_data_size = self._verifycap.size % self.segment_size
        if not self.tail_data_size:
            self.tail_data_size = self.segment_size
        # padding for erasure code
        self.tail_segment_size = mathutil.next_multiple(
            self.tail_data_size, self._verifycap.needed_shares)

        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B.
        self.crypttext_root_hash = d['crypttext_root_hash']

        self.share_root_hash = d['share_root_hash']

        # Next: things that are optional and not redundant: crypttext_hash
        if d.has_key('crypttext_hash'):
            self.crypttext_hash = d['crypttext_hash']
            if len(self.crypttext_hash) != CRYPTO_VAL_SIZE:
                raise BadURIExtension(
                    'crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes'
                    % (len(self.crypttext_hash), ))

        # Next: things that are optional, redundant, and required to be
        # consistent: codec_name, codec_params, tail_codec_params,
        # num_segments, size, needed_shares, total_shares
        if d.has_key('codec_name'):
            if d['codec_name'] != "crs":
                raise UnsupportedErasureCodec(d['codec_name'])

        if d.has_key('codec_params'):
            ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])
            if ucpss != self.segment_size:
                raise BadURIExtension("inconsistent erasure code params: "
                                      "ucpss: %s != self.segment_size: %s" %
                                      (ucpss, self.segment_size))
            if ucpns != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: ucpns: %s != "
                    "self._verifycap.needed_shares: %s" %
                    (ucpns, self._verifycap.needed_shares))
            if ucpts != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: ucpts: %s != "
                    "self._verifycap.total_shares: %s" %
                    (ucpts, self._verifycap.total_shares))

        if d.has_key('tail_codec_params'):
            utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])
            if utcpss != self.tail_segment_size:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpss: %s != "
                    "self.tail_segment_size: %s, self._verifycap.size: %s, "
                    "self.segment_size: %s, self._verifycap.needed_shares: %s"
                    % (utcpss, self.tail_segment_size, self._verifycap.size,
                       self.segment_size, self._verifycap.needed_shares))
            if utcpns != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpns: %s != "
                    "self._verifycap.needed_shares: %s" %
                    (utcpns, self._verifycap.needed_shares))
            if utcpts != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpts: %s != "
                    "self._verifycap.total_shares: %s" %
                    (utcpts, self._verifycap.total_shares))

        if d.has_key('num_segments'):
            if d['num_segments'] != self.num_segments:
                raise BadURIExtension(
                    "inconsistent num_segments: size: %s, "
                    "segment_size: %s, computed_num_segments: %s, "
                    "ueb_num_segments: %s" %
                    (self._verifycap.size, self.segment_size,
                     self.num_segments, d['num_segments']))

        if d.has_key('size'):
            if d['size'] != self._verifycap.size:
                raise BadURIExtension(
                    "inconsistent size: URI size: %s, UEB size: %s" %
                    (self._verifycap.size, d['size']))

        if d.has_key('needed_shares'):
            if d['needed_shares'] != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent needed shares: URI needed shares: %s, UEB "
                    "needed shares: %s" %
                    (self._verifycap.total_shares, d['needed_shares']))

        if d.has_key('total_shares'):
            if d['total_shares'] != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent total shares: URI total shares: %s, UEB "
                    "total shares: %s" %
                    (self._verifycap.total_shares, d['total_shares']))

        # Finally, things that are deprecated and ignored: plaintext_hash,
        # plaintext_root_hash
        if d.get('plaintext_hash'):
            log.msg(
                "Found plaintext_hash in UEB. This field is deprecated for security reasons "
                "and is no longer used.  Ignoring.  %s" % (self, ))
        if d.get('plaintext_root_hash'):
            log.msg(
                "Found plaintext_root_hash in UEB. This field is deprecated for security "
                "reasons and is no longer used.  Ignoring.  %s" % (self, ))

        return self
Beispiel #40
0
    def __init__(self, verifycap, storage_broker, secret_holder,
                 terminator, history, download_status):
        assert isinstance(verifycap, uri.CHKFileVerifierURI)
        self._verifycap = verifycap
        self._storage_broker = storage_broker
        self._si_prefix = base32.b2a(verifycap.storage_index[:8])[:12]
        self.running = True
        if terminator:
            terminator.register(self) # calls self.stop() at stopService()
        # the rules are:
        # 1: Only send network requests if you're active (self.running is True)
        # 2: Use TimerService, not reactor.callLater
        # 3: You can do eventual-sends any time.
        # These rules should mean that once
        # stopService()+flushEventualQueue() fires, everything will be done.
        self._secret_holder = secret_holder
        self._history = history
        self._download_status = download_status

        self.share_hash_tree = IncompleteHashTree(self._verifycap.total_shares)

        # we guess the segment size, so Segmentation can pull non-initial
        # segments in a single roundtrip. This populates
        # .guessed_segment_size, .guessed_num_segments, and
        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
        # we'll need)
        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)

        # filled in when we parse a valid UEB
        self.have_UEB = False
        self.segment_size = None
        self.tail_segment_size = None
        self.tail_segment_padded = None
        self.num_segments = None
        self.block_size = None
        self.tail_block_size = None

        # things to track callers that want data

        # _segment_requests can have duplicates
        self._segment_requests = [] # (segnum, d, cancel_handle, seg_ev, lp)
        self._active_segment = None # a SegmentFetcher, with .segnum

        self._segsize_observers = observer.OneShotObserverList()

        # we create one top-level logparent for this _Node, and another one
        # for each read() call. Segmentation and get_segment() messages are
        # associated with the read() call, everything else is tied to the
        # _Node's log entry.
        lp = log.msg(format="Immutable.DownloadNode(%(si)s) created:"
                     " size=%(size)d,"
                     " guessed_segsize=%(guessed_segsize)d,"
                     " guessed_numsegs=%(guessed_numsegs)d",
                     si=self._si_prefix, size=verifycap.size,
                     guessed_segsize=self.guessed_segment_size,
                     guessed_numsegs=self.guessed_num_segments,
                     level=log.OPERATIONAL, umid="uJ0zAQ")
        self._lp = lp

        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
                                        self._download_status, lp)
        self._shares = set()
Beispiel #41
0
 def log(self, *args, **kwargs):
     if "facility" not in kwargs:
         kwargs["facility"] = "tahoe.introducer"
     return log.msg(*args, **kwargs)
Beispiel #42
0
    def __init__(self,
                 storedir,
                 nodeid,
                 reserved_space=0,
                 discard_storage=False,
                 readonly_storage=False,
                 stats_provider=None,
                 expiration_enabled=False,
                 expiration_mode="age",
                 expiration_override_lease_duration=None,
                 expiration_cutoff_date=None,
                 expiration_sharetypes=("mutable", "immutable")):
        service.MultiService.__init__(self)
        assert isinstance(nodeid, str)
        assert len(nodeid) == 20
        self.my_nodeid = nodeid
        self.storedir = storedir
        sharedir = os.path.join(storedir, "shares")
        fileutil.make_dirs(sharedir)
        self.sharedir = sharedir
        # we don't actually create the corruption-advisory dir until necessary
        self.corruption_advisory_dir = os.path.join(storedir,
                                                    "corruption-advisories")
        self.reserved_space = int(reserved_space)
        self.no_storage = discard_storage
        self.readonly_storage = readonly_storage
        self.stats_provider = stats_provider
        if self.stats_provider:
            self.stats_provider.register_producer(self)
        self.incomingdir = os.path.join(sharedir, 'incoming')
        self._clean_incomplete()
        fileutil.make_dirs(self.incomingdir)
        self._active_writers = weakref.WeakKeyDictionary()
        log.msg("StorageServer created", facility="tahoe.storage")

        if reserved_space:
            if self.get_available_space() is None:
                log.msg(
                    "warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored",
                    umin="0wZ27w",
                    level=log.UNUSUAL)

        self.latencies = {
            "allocate": [],  # immutable
            "write": [],
            "close": [],
            "read": [],
            "get": [],
            "writev": [],  # mutable
            "readv": [],
            "add-lease": [],  # both
            "renew": [],
            "cancel": [],
        }
        self.add_bucket_counter()

        statefile = os.path.join(self.storedir, "lease_checker.state")
        historyfile = os.path.join(self.storedir, "lease_checker.history")
        klass = self.LeaseCheckerClass
        self.lease_checker = klass(self, statefile, historyfile,
                                   expiration_enabled, expiration_mode,
                                   expiration_override_lease_duration,
                                   expiration_cutoff_date,
                                   expiration_sharetypes)
        self.lease_checker.setServiceParent(self)
Beispiel #43
0
 def log(self, *args, **kwargs):
     if 'facility' not in kwargs:
         kwargs['facility'] = "tahoe.helper"
     return log.msg(*args, **kwargs)
Beispiel #44
0
    def remote_slot_testv_and_readv_and_writev(self, storage_index, secrets,
                                               test_and_write_vectors,
                                               read_vector):
        start = time.time()
        self.count("writev")
        si_s = si_b2a(storage_index)
        log.msg("storage: slot_writev %s" % si_s)
        si_dir = storage_index_to_dir(storage_index)
        (write_enabler, renew_secret, cancel_secret) = secrets
        # shares exist if there is a file for them
        bucketdir = os.path.join(self.sharedir, si_dir)
        shares = {}
        if os.path.isdir(bucketdir):
            for sharenum_s in os.listdir(bucketdir):
                try:
                    sharenum = int(sharenum_s)
                except ValueError:
                    continue
                filename = os.path.join(bucketdir, sharenum_s)
                msf = MutableShareFile(filename, self)
                msf.check_write_enabler(write_enabler, si_s)
                shares[sharenum] = msf
        # write_enabler is good for all existing shares.

        # Now evaluate test vectors.
        testv_is_good = True
        for sharenum in test_and_write_vectors:
            (testv, datav, new_length) = test_and_write_vectors[sharenum]
            if sharenum in shares:
                if not shares[sharenum].check_testv(testv):
                    self.log("testv failed: [%d]: %r" % (sharenum, testv))
                    testv_is_good = False
                    break
            else:
                # compare the vectors against an empty share, in which all
                # reads return empty strings.
                if not EmptyShare().check_testv(testv):
                    self.log("testv failed (empty): [%d] %r" %
                             (sharenum, testv))
                    testv_is_good = False
                    break

        # now gather the read vectors, before we do any writes
        read_data = {}
        for sharenum, share in shares.items():
            read_data[sharenum] = share.readv(read_vector)

        ownerid = 1  # TODO
        expire_time = time.time() + 31 * 24 * 60 * 60  # one month
        lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret,
                               expire_time, self.my_nodeid)

        if testv_is_good:
            # now apply the write vectors
            for sharenum in test_and_write_vectors:
                (testv, datav, new_length) = test_and_write_vectors[sharenum]
                if new_length == 0:
                    if sharenum in shares:
                        shares[sharenum].unlink()
                else:
                    if sharenum not in shares:
                        # allocate a new share
                        allocated_size = 2000  # arbitrary, really
                        share = self._allocate_slot_share(bucketdir,
                                                          secrets,
                                                          sharenum,
                                                          allocated_size,
                                                          owner_num=0)
                        shares[sharenum] = share
                    shares[sharenum].writev(datav, new_length)
                    # and update the lease
                    shares[sharenum].add_or_renew_lease(lease_info)

            if new_length == 0:
                # delete empty bucket directories
                if not os.listdir(bucketdir):
                    os.rmdir(bucketdir)

        # all done
        self.add_latency("writev", time.time() - start)
        return (testv_is_good, read_data)
Beispiel #45
0
 def _complain(output_file, message):
     print(isinstance(message, str) and message or repr(message),
           file=output_file)
     log.msg(message, level=log.WEIRD)
Beispiel #46
0
            self.log(" block hash: %s" % base32.b2a_or_none(blockhash))
            if len(blockdata) < 100:
                self.log(" block data: %r" % (blockdata, ))
            else:
                self.log(" block data start/end: %r .. %r" %
                         (blockdata[:50], blockdata[-50:]))
            self.log(" share hash tree:\n" + self.share_hash_tree.dump())
            self.log(" block hash tree:\n" + self.block_hash_tree.dump())
            lines = []
            for i, h in sorted(sharehashes.items()):
                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
            self.log(" sharehashes:\n" + "\n".join(lines) + "\n")
            lines = []
            for i, h in blockhashes.items():
                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
            log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
            raise BadOrMissingHash(le)

        # If we made it here, the block is good. If the hash trees didn't
        # like what they saw, they would have raised a BadHashError, causing
        # our caller to see a Failure and thus ignore this block (as well as
        # dropping this bucket).
        return blockdata


class Checker(log.PrefixingLogMixin):
    """I query all servers to see if M uniquely-numbered shares are
    available.

    If the verify flag was passed to my constructor, then for each share I
    download every data block and all metadata from each server and perform a
Beispiel #47
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.get_config("storage", "enabled", True, boolean=True):
            return
        readonly = self.get_config("storage", "readonly", False, boolean=True)

        self._maybe_create_node_key()

        storedir = os.path.join(self.basedir, self.STOREDIR)

        data = self.get_config("storage", "reserved_space", None)
        reserved = None
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s" %
                    data)
        if reserved is None:
            reserved = 0
        discard = self.get_config("storage",
                                  "debug_discard",
                                  False,
                                  boolean=True)

        expire = self.get_config("storage",
                                 "expire.enabled",
                                 False,
                                 boolean=True)
        if expire:
            mode = self.get_config("storage", "expire.mode")  # require a mode
        else:
            mode = self.get_config("storage", "expire.mode", "age")

        o_l_d = self.get_config("storage", "expire.override_lease_duration",
                                None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir,
                           self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        self.add_service(ss)

        d = self.when_tub_ready()

        # we can't do registerReference until the Tub is ready
        def _publish(res):
            furl_file = os.path.join(self.basedir, "private",
                                     "storage.furl").encode(
                                         get_filesystem_encoding())
            furl = self.tub.registerReference(ss, furlFile=furl_file)
            ann = {
                "anonymous-storage-FURL": furl,
                "permutation-seed-base32": self._init_permutation_seed(ss),
            }
            self.introducer_client.publish("storage", ann, self._server_key)

        d.addCallback(_publish)
        d.addErrback(log.err,
                     facility="tahoe.init",
                     level=log.BAD,
                     umid="aLGBKw")
Beispiel #48
0
    def _fill_checker_results(self, smap, r):
        self._monitor.raise_if_cancelled()
        r.set_servermap(smap.copy())
        healthy = True
        data = {}
        report = []
        summary = []
        vmap = smap.make_versionmap()
        recoverable = smap.recoverable_versions()
        unrecoverable = smap.unrecoverable_versions()
        data["count-recoverable-versions"] = len(recoverable)
        data["count-unrecoverable-versions"] = len(unrecoverable)

        if recoverable:
            report.append("Recoverable Versions: " + "/".join([
                "%d*%s" % (len(vmap[v]), smap.summarize_version(v))
                for v in recoverable
            ]))
        if unrecoverable:
            report.append("Unrecoverable Versions: " + "/".join([
                "%d*%s" % (len(vmap[v]), smap.summarize_version(v))
                for v in unrecoverable
            ]))
        if smap.unrecoverable_versions():
            healthy = False
            summary.append("some versions are unrecoverable")
            report.append("Unhealthy: some versions are unrecoverable")
        if len(recoverable) == 0:
            healthy = False
            summary.append("no versions are recoverable")
            report.append("Unhealthy: no versions are recoverable")
        if len(recoverable) > 1:
            healthy = False
            summary.append("multiple versions are recoverable")
            report.append("Unhealthy: there are multiple recoverable versions")

        needs_rebalancing = False
        if recoverable:
            best_version = smap.best_recoverable_version()
            report.append("Best Recoverable Version: " +
                          smap.summarize_version(best_version))
            counters = self._count_shares(smap, best_version)
            data.update(counters)
            s = counters["count-shares-good"]
            k = counters["count-shares-needed"]
            N = counters["count-shares-expected"]
            if s < N:
                healthy = False
                report.append("Unhealthy: best version has only %d shares "
                              "(encoding is %d-of-%d)" % (s, k, N))
                summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
            hosts = smap.all_peers_for_version(best_version)
            needs_rebalancing = bool(len(hosts) < N)
        elif unrecoverable:
            healthy = False
            # find a k and N from somewhere
            first = list(unrecoverable)[0]
            # not exactly the best version, but that doesn't matter too much
            data.update(self._count_shares(smap, first))
            # leave needs_rebalancing=False: the file being unrecoverable is
            # the bigger problem
        else:
            # couldn't find anything at all
            data["count-shares-good"] = 0
            data["count-shares-needed"] = 3  # arbitrary defaults
            data["count-shares-expected"] = 10
            data["count-good-share-hosts"] = 0
            data["count-wrong-shares"] = 0

        if self.bad_shares:
            data["count-corrupt-shares"] = len(self.bad_shares)
            data["list-corrupt-shares"] = locators = []
            report.append("Corrupt Shares:")
            summary.append("Corrupt Shares:")
            for (peerid, shnum, f) in sorted(self.bad_shares):
                locators.append((peerid, self._storage_index, shnum))
                s = "%s-sh%d" % (idlib.shortnodeid_b2a(peerid), shnum)
                if f.check(CorruptShareError):
                    ft = f.value.reason
                else:
                    ft = str(f)
                report.append(" %s: %s" % (s, ft))
                summary.append(s)
                p = (peerid, self._storage_index, shnum, f)
                r.problems.append(p)
                msg = ("CorruptShareError during mutable verify, "
                       "peerid=%(peerid)s, si=%(si)s, shnum=%(shnum)d, "
                       "where=%(where)s")
                log.msg(format=msg,
                        peerid=idlib.nodeid_b2a(peerid),
                        si=base32.b2a(self._storage_index),
                        shnum=shnum,
                        where=ft,
                        level=log.WEIRD,
                        umid="EkK8QA")
        else:
            data["count-corrupt-shares"] = 0
            data["list-corrupt-shares"] = []

        sharemap = {}
        for verinfo in vmap:
            for (shnum, peerid, timestamp) in vmap[verinfo]:
                shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum)
                if shareid not in sharemap:
                    sharemap[shareid] = []
                sharemap[shareid].append(peerid)
        data["sharemap"] = sharemap
        data["servers-responding"] = list(smap.reachable_peers)

        r.set_healthy(healthy)
        r.set_recoverable(bool(recoverable))
        r.set_needs_rebalancing(needs_rebalancing)
        r.set_data(data)
        if healthy:
            r.set_summary("Healthy")
        else:
            r.set_summary("Unhealthy: " + " ".join(summary))
        r.set_report(report)
Beispiel #49
0
 def finish(self):
     log.msg("deep-check done", parent=self._lp)
     self._results.update_stats(self._stats.get_results())
     return self._results
Beispiel #50
0
    def get_anonymous_storage_server(self):
        """
        Get the anonymous ``IStorageServer`` implementation for this node.

        Note this will return an object even if storage is disabled on this
        node (but the object will not be exposed, peers will not be able to
        access it, and storage will remain disabled).

        The one and only instance for this node is always returned.  It is
        created first if necessary.
        """
        try:
            ss = self.getServiceNamed(StorageServer.name)
        except KeyError:
            pass
        else:
            return ss

        readonly = self.config.get_config("storage",
                                          "readonly",
                                          False,
                                          boolean=True)

        config_storedir = self.get_config(
            "storage",
            "storage_dir",
            self.STOREDIR,
        )
        storedir = self.config.get_config_path(config_storedir)

        data = self.config.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s" %
                    data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.config.get_config("storage",
                                         "debug_discard",
                                         False,
                                         boolean=True)

        expire = self.config.get_config("storage",
                                        "expire.enabled",
                                        False,
                                        boolean=True)
        if expire:
            mode = self.config.get_config("storage",
                                          "expire.mode")  # require a mode
        else:
            mode = self.config.get_config("storage", "expire.mode", "age")

        o_l_d = self.config.get_config("storage",
                                       "expire.override_lease_duration", None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.config.get_config("storage",
                                                 "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.config.get_config("storage",
                                  "expire.immutable",
                                  True,
                                  boolean=True):
            sharetypes.append("immutable")
        if self.config.get_config("storage",
                                  "expire.mutable",
                                  True,
                                  boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir,
                           self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        ss.setServiceParent(self)
        return ss
Beispiel #51
0
 def _lost(self):
     log.msg(format="lost connection to %(name)s", name=self.name(),
             facility="tahoe.storage_broker", umid="zbRllw")
     self.last_loss_time = time.time()
     self.rref = None
     self.remote_host = None
Beispiel #52
0
def create_introducer_clients(config, main_tub, _introducer_factory=None):
    """
    Read, validate and parse any 'introducers.yaml' configuration.

    :param _introducer_factory: for testing; the class to instantiate instead
        of IntroducerClient

    :returns: a list of IntroducerClient instances
    """
    if _introducer_factory is None:
        _introducer_factory = IntroducerClient

    # we return this list
    introducer_clients = []

    introducers_yaml_filename = config.get_private_path("introducers.yaml")
    introducers_filepath = FilePath(introducers_yaml_filename)

    try:
        with introducers_filepath.open() as f:
            introducers_yaml = yamlutil.safe_load(f)
            if introducers_yaml is None:
                raise EnvironmentError(
                    EPERM,
                    "Can't read '{}'".format(introducers_yaml_filename),
                    introducers_yaml_filename,
                )
            introducers = introducers_yaml.get("introducers", {})
            log.msg(
                "found {} introducers in private/introducers.yaml".format(
                    len(introducers),
                )
            )
    except EnvironmentError as e:
        if e.errno != ENOENT:
            raise
        introducers = {}

    if "default" in introducers.keys():
        raise ValueError(
            "'default' introducer furl cannot be specified in introducers.yaml;"
            " please fix impossible configuration."
        )

    # read furl from tahoe.cfg
    tahoe_cfg_introducer_furl = config.get_config("client", "introducer.furl", None)
    if tahoe_cfg_introducer_furl == "None":
        raise ValueError(
            "tahoe.cfg has invalid 'introducer.furl = None':"
            " to disable it, use 'introducer.furl ='"
            " or omit the key entirely"
        )
    if tahoe_cfg_introducer_furl:
        introducers[u'default'] = {'furl':tahoe_cfg_introducer_furl}

    for petname, introducer in introducers.items():
        introducer_cache_filepath = FilePath(config.get_private_path("introducer_{}_cache.yaml".format(petname)))
        ic = _introducer_factory(
            main_tub,
            introducer['furl'].encode("ascii"),
            config.nickname,
            str(allmydata.__full_version__),
            str(_Client.OLDEST_SUPPORTED_VERSION),
            list(node.get_app_versions()),
            partial(_sequencer, config),
            introducer_cache_filepath,
        )
        introducer_clients.append(ic)
    return introducer_clients
Beispiel #53
0
 def log(self, *args, **kwargs):
     if "facility" not in kwargs:
         kwargs["facility"] = "tahoe.helper.chkupload.fetch"
     if "parent" not in kwargs:
         kwargs["parent"] = self._log_parent
     return log.msg(*args, **kwargs)
Beispiel #54
0
 def _log(self, msg):
     log.msg(msg, level=log.NOISY)
Beispiel #55
0
    def init_storage(self):
        # should we run a storage server (and publish it for others to use)?
        if not self.get_config("storage", "enabled", True, boolean=True):
            return
        if not self._tub_is_listening:
            raise ValueError("config error: storage is enabled, but tub "
                             "is not listening ('tub.port=' is empty)")
        readonly = self.get_config("storage", "readonly", False, boolean=True)

        storedir = os.path.join(self.basedir, self.STOREDIR)

        data = self.get_config("storage", "reserved_space", None)
        try:
            reserved = parse_abbreviated_size(data)
        except ValueError:
            log.msg("[storage]reserved_space= contains unparseable value %s" %
                    data)
            raise
        if reserved is None:
            reserved = 0
        discard = self.get_config("storage",
                                  "debug_discard",
                                  False,
                                  boolean=True)

        expire = self.get_config("storage",
                                 "expire.enabled",
                                 False,
                                 boolean=True)
        if expire:
            mode = self.get_config("storage", "expire.mode")  # require a mode
        else:
            mode = self.get_config("storage", "expire.mode", "age")

        o_l_d = self.get_config("storage", "expire.override_lease_duration",
                                None)
        if o_l_d is not None:
            o_l_d = parse_duration(o_l_d)

        cutoff_date = None
        if mode == "cutoff-date":
            cutoff_date = self.get_config("storage", "expire.cutoff_date")
            cutoff_date = parse_date(cutoff_date)

        sharetypes = []
        if self.get_config("storage", "expire.immutable", True, boolean=True):
            sharetypes.append("immutable")
        if self.get_config("storage", "expire.mutable", True, boolean=True):
            sharetypes.append("mutable")
        expiration_sharetypes = tuple(sharetypes)

        ss = StorageServer(storedir,
                           self.nodeid,
                           reserved_space=reserved,
                           discard_storage=discard,
                           readonly_storage=readonly,
                           stats_provider=self.stats_provider,
                           expiration_enabled=expire,
                           expiration_mode=mode,
                           expiration_override_lease_duration=o_l_d,
                           expiration_cutoff_date=cutoff_date,
                           expiration_sharetypes=expiration_sharetypes)
        self.add_service(ss)

        furl_file = os.path.join(self.basedir, "private",
                                 "storage.furl").encode(
                                     get_filesystem_encoding())
        furl = self.tub.registerReference(ss, furlFile=furl_file)
        ann = {
            "anonymous-storage-FURL": furl,
            "permutation-seed-base32": self._init_permutation_seed(ss),
        }
        for ic in self.introducer_clients:
            ic.publish("storage", ann, self._node_key)
Beispiel #56
0
    def _satisfy_offsets(self):
        version_s = self._received.get(0, 4)
        if version_s is None:
            return False
        (version, ) = struct.unpack(">L", version_s)
        if version == 1:
            table_start = 0x0c
            self._fieldsize = 0x4
            self._fieldstruct = "L"
        elif version == 2:
            table_start = 0x14
            self._fieldsize = 0x8
            self._fieldstruct = "Q"
        else:
            self.had_corruption = True
            raise LayoutInvalid("unknown version %d (I understand 1 and 2)" %
                                version)
        offset_table_size = 6 * self._fieldsize
        table_s = self._received.pop(table_start, offset_table_size)
        if table_s is None:
            return False
        fields = struct.unpack(">" + 6 * self._fieldstruct, table_s)
        offsets = {}
        for i, field in enumerate([
                'data',
                'plaintext_hash_tree',  # UNUSED
                'crypttext_hash_tree',
                'block_hashes',
                'share_hashes',
                'uri_extension',
        ]):
            offsets[field] = fields[i]
        self.actual_offsets = offsets
        log.msg(
            "actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d"
            % tuple(fields),
            level=log.NOISY,
            parent=self._lp,
            umid="jedQcw")
        self._received.remove(0, 4)  # don't need this anymore

        # validate the offsets a bit
        share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"]
        if share_hashes_size < 0 or share_hashes_size % (2 + HASH_SIZE) != 0:
            # the share hash chain is stored as (hashnum,hash) pairs
            self.had_corruption = True
            raise LayoutInvalid("share hashes malformed -- should be a"
                                " multiple of %d bytes -- not %d" %
                                (2 + HASH_SIZE, share_hashes_size))
        block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"]
        if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0:
            # the block hash tree is stored as a list of hashes
            self.had_corruption = True
            raise LayoutInvalid("block hashes malformed -- should be a"
                                " multiple of %d bytes -- not %d" %
                                (HASH_SIZE, block_hashes_size))
        # we only look at 'crypttext_hash_tree' if the UEB says we're
        # actually using it. Same with 'plaintext_hash_tree'. This gives us
        # some wiggle room: a place to stash data for later extensions.

        return True
Beispiel #57
0
 def log(self, *args, **kwargs):
     if "parent" not in kwargs:
         kwargs["parent"] = self._log_number
     if "facility" not in kwargs:
         kwargs["facility"] = "tahoe.mutable.retrieve"
     return log.msg(*args, **kwargs)
Beispiel #58
0
 def _complain(message):
     print(isinstance(message, str) and message or repr(message),
           file=original_stderr)
     log.msg(message, level=log.WEIRD)
Beispiel #59
0
 def log(self, *args, **kwargs):
     if "parent" not in kwargs:
         kwargs["parent"] = self._lp
     return log.msg(*args, **kwargs)
Beispiel #60
0
    def __init__(self, rref, server, verifycap, commonshare, node,
                 download_status, shnum, dyhb_rtt, logparent):
        self._rref = rref
        self._server = server
        self._node = node  # holds share_hash_tree and UEB
        self.actual_segment_size = node.segment_size  # might still be None
        # XXX change node.guessed_segment_size to
        # node.best_guess_segment_size(), which should give us the real ones
        # if known, else its guess.
        self._guess_offsets(verifycap, node.guessed_segment_size)
        self.actual_offsets = None
        self._UEB_length = None
        self._commonshare = commonshare  # holds block_hash_tree
        self._download_status = download_status
        self._storage_index = verifycap.storage_index
        self._si_prefix = base32.b2a(verifycap.storage_index)[:8]
        self._shnum = shnum
        self._dyhb_rtt = dyhb_rtt
        # self._alive becomes False upon fatal corruption or server error
        self._alive = True
        self._loop_scheduled = False
        self._lp = log.msg(format="%(share)s created",
                           share=repr(self),
                           level=log.NOISY,
                           parent=logparent,
                           umid="P7hv2w")

        self._pending = Spans()  # request sent but no response received yet
        self._received = DataSpans()  # ACK response received, with data
        self._unavailable = Spans()  # NAK response received, no data

        # any given byte of the share can be in one of four states:
        #  in: _wanted, _requested, _received
        #      FALSE    FALSE       FALSE : don't care about it at all
        #      TRUE     FALSE       FALSE : want it, haven't yet asked for it
        #      TRUE     TRUE        FALSE : request is in-flight
        #                                   or didn't get it
        #      FALSE    TRUE        TRUE  : got it, haven't used it yet
        #      FALSE    TRUE        FALSE : got it and used it
        #      FALSE    FALSE       FALSE : block consumed, ready to ask again
        #
        # when we request data and get a NAK, we leave it in _requested
        # to remind ourself to not ask for it again. We don't explicitly
        # remove it from anything (maybe this should change).
        #
        # We retain the hashtrees in the Node, so we leave those spans in
        # _requested (and never ask for them again, as long as the Node is
        # alive). But we don't retain data blocks (too big), so when we
        # consume a data block, we remove it from _requested, so a later
        # download can re-fetch it.

        self._requested_blocks = []  # (segnum, set(observer2..))
        v = server.get_version()
        ver = v["http://allmydata.org/tahoe/protocols/storage/v1"]
        self._overrun_ok = ver["tolerates-immutable-read-overrun"]
        # If _overrun_ok and we guess the offsets correctly, we can get
        # everything in one RTT. If _overrun_ok and we guess wrong, we might
        # need two RTT (but we could get lucky and do it in one). If overrun
        # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version,
        # 2=offset table, 3=UEB_length and everything else (hashes, block),
        # 4=UEB.

        self.had_corruption = False  # for unit tests