Ejemplo n.º 1
0
def _describe_immutable_share(abs_sharefile, now, si_s, out):
    class ImmediateReadBucketProxy(ReadBucketProxy):
        def __init__(self, sf):
            self.sf = sf
            ReadBucketProxy.__init__(self, None, None, "")

        def __repr__(self):
            return "<ImmediateReadBucketProxy>"

        def _read(self, offset, size):
            return defer.succeed(sf.read_share_data(offset, size))

    # use a ReadBucketProxy to parse the bucket and find the uri extension
    sf = ShareFile(abs_sharefile)
    bp = ImmediateReadBucketProxy(sf)

    expiration_time = min(lease.get_expiration_time()
                          for lease in sf.get_leases())
    expiration = max(0, expiration_time - now)

    UEB_data = call(bp.get_uri_extension)
    unpacked = uri.unpack_extension_readable(UEB_data)

    k = unpacked["needed_shares"]
    N = unpacked["total_shares"]
    filesize = unpacked["size"]
    ueb_hash = unpacked["UEB_hash"]

    print("CHK %s %d/%d %d %s %d %s" %
          (si_s, k, N, filesize, str(
              ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)),
          file=out)
Ejemplo n.º 2
0
    def _fetch(self):
        needed = self._expected_size - self._have
        fetch_size = min(needed, self.CHUNK_SIZE)
        if fetch_size == 0:
            self._upload_helper._upload_status.set_progress(1, 1.0)
            return True  # all done
        percent = 0.0
        if self._expected_size:
            percent = 1.0 * (self._have + fetch_size) / self._expected_size
        self.log(
            format=
            "fetching [%(si)s] %(start)d-%(end)d of %(total)d (%(percent)d%%)",
            si=self._upload_id,
            start=self._have,
            end=self._have + fetch_size,
            total=self._expected_size,
            percent=int(100.0 * percent),
            level=log.NOISY)
        d = self.call("read_encrypted", self._have, fetch_size)

        def _got_data(ciphertext_v):
            for data in ciphertext_v:
                self._f.write(data)
                self._have += len(data)
                self._ciphertext_fetched += len(data)
                self._upload_helper._helper.count(
                    "chk_upload_helper.fetched_bytes", len(data))
                self._upload_helper._upload_status.set_progress(1, percent)
            return False  # not done

        d.addCallback(_got_data)
        return d
Ejemplo n.º 3
0
 def __init__(self, filename, max_size=None, create=False):
     """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """
     precondition((max_size is not None) or (not create), max_size, create)
     self.home = filename
     self._max_size = max_size
     if create:
         # touch the file, so later callers will see that we're working on
         # it. Also construct the metadata.
         assert not os.path.exists(self.home)
         fileutil.make_dirs(os.path.dirname(self.home))
         # The second field -- the four-byte share data length -- is no
         # longer used as of Tahoe v1.3.0, but we continue to write it in
         # there in case someone downgrades a storage server from >=
         # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one
         # server to another, etc. We do saturation -- a share data length
         # larger than 2**32-1 (what can fit into the field) is marked as
         # the largest length that can fit into the field. That way, even
         # if this does happen, the old < v1.3.0 server will still allow
         # clients to read the first part of the share.
         with open(self.home, 'wb') as f:
             f.write(struct.pack(">LLL", 1, min(2**32 - 1, max_size), 0))
         self._lease_offset = max_size + 0x0c
         self._num_leases = 0
     else:
         with open(self.home, 'rb') as f:
             filesize = os.path.getsize(self.home)
             (version, unused,
              num_leases) = struct.unpack(">LLL", f.read(0xc))
         if version != 1:
             msg = "sharefile %s had version %d but we wanted 1" % \
                   (filename, version)
             raise UnknownImmutableContainerVersionError(msg)
         self._num_leases = num_leases
         self._lease_offset = filesize - (num_leases * self.LEASE_SIZE)
     self._data_offset = 0xc
Ejemplo n.º 4
0
    def read(self, consumer, offset, size):
        """I am the main entry point, from which FileNode.read() can get
        data. I feed the consumer with the desired range of ciphertext. I
        return a Deferred that fires (with the consumer) when the read is
        finished.

        Note that there is no notion of a 'file pointer': each call to read()
        uses an independent offset= value.
        """
        # for concurrent operations: each gets its own Segmentation manager
        if size is None:
            size = self._verifycap.size
        # ignore overruns: clip size so offset+size does not go past EOF, and
        # so size is not negative (which indicates that offset >= EOF)
        size = max(0, min(size, self._verifycap.size - offset))

        read_ev = self._download_status.add_read_event(offset, size, now())
        if IDownloadStatusHandlingConsumer.providedBy(consumer):
            consumer.set_download_status_read_event(read_ev)
            consumer.set_download_status(self._download_status)

        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     offset=offset,
                     size=size,
                     level=log.OPERATIONAL,
                     parent=self._lp,
                     umid="l3j3Ww")
        if self._history:
            sp = self._history.stats_provider
            sp.count("downloader.files_downloaded", 1)  # really read() calls
            sp.count("downloader.bytes_downloaded", size)
        if size == 0:
            read_ev.finished(now())
            # no data, so no producer, so no register/unregisterProducer
            return defer.succeed(consumer)

        # for concurrent operations, each read() gets its own Segmentation
        # manager
        s = Segmentation(self, offset, size, consumer, read_ev, lp)

        # this raises an interesting question: what segments to fetch? if
        # offset=0, always fetch the first segment, and then allow
        # Segmentation to be responsible for pulling the subsequent ones if
        # the first wasn't large enough. If offset>0, we're going to need an
        # extra roundtrip to get the UEB (and therefore the segment size)
        # before we can figure out which segment to get. TODO: allow the
        # offset-table-guessing code (which starts by guessing the segsize)
        # to assist the offset>0 process.
        d = s.start()

        def _done(res):
            read_ev.finished(now())
            return res

        d.addBoth(_done)
        return d
Ejemplo n.º 5
0
 def _build_guessed_tables(self, max_segment_size):
     size = min(self._verifycap.size, max_segment_size)
     s = mathutil.next_multiple(size, self._verifycap.needed_shares)
     self.guessed_segment_size = s
     r = self._calculate_sizes(self.guessed_segment_size)
     self.guessed_num_segments = r["num_segments"]
     # as with CommonShare, our ciphertext_hash_tree is a stub until we
     # get the real num_segments
     self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
     self.ciphertext_hash_tree_leaves = self.guessed_num_segments
Ejemplo n.º 6
0
    def test_previous_upload_failed(self):
        self.basedir = "helper/AssistedUpload/test_previous_upload_failed"
        self.setUpHelper(self.basedir)

        # we want to make sure that an upload which fails (leaving the
        # ciphertext in the CHK_encoding/ directory) does not prevent a later
        # attempt to upload that file from working. We simulate this by
        # populating the directory manually. The hardest part is guessing the
        # storage index.

        k = FakeClient.DEFAULT_ENCODING_PARAMETERS["k"]
        n = FakeClient.DEFAULT_ENCODING_PARAMETERS["n"]
        max_segsize = FakeClient.DEFAULT_ENCODING_PARAMETERS[
            "max_segment_size"]
        segsize = min(max_segsize, len(DATA))
        # this must be a multiple of 'required_shares'==k
        segsize = mathutil.next_multiple(segsize, k)

        key = hashutil.convergence_hash(k, n, segsize, DATA,
                                        b"test convergence string")
        assert len(key) == 16
        encryptor = aes.create_encryptor(key)
        SI = hashutil.storage_index_hash(key)
        SI_s = str(si_b2a(SI), "utf-8")
        encfile = os.path.join(self.basedir, "CHK_encoding", SI_s)
        f = open(encfile, "wb")
        f.write(aes.encrypt_data(encryptor, DATA))
        f.close()

        u = upload.Uploader(self.helper_furl)
        u.setServiceParent(self.s)

        d = wait_a_few_turns()

        def _ready(res):
            assert u._helper
            return upload_data(u, DATA, convergence=b"test convergence string")

        d.addCallback(_ready)

        def _uploaded(results):
            the_uri = results.get_uri()
            assert b"CHK" in the_uri

        d.addCallback(_uploaded)

        def _check_empty(res):
            files = os.listdir(os.path.join(self.basedir, "CHK_encoding"))
            self.failUnlessEqual(files, [])
            files = os.listdir(os.path.join(self.basedir, "CHK_incoming"))
            self.failUnlessEqual(files, [])

        d.addCallback(_check_empty)

        return d
Ejemplo n.º 7
0
    def check_directory(self, contents):
        """I will tell you if a new directory needs to be created for a given
        set of directory contents, or if I know of an existing (immutable)
        directory that can be used instead.

        'contents' should be a dictionary that maps from child name (a single
        unicode string) to immutable childcap (filecap or dircap).

        I return a DirectoryResult object, synchronously. If r.was_created()
        returns False, you should create the directory (with
        t=mkdir-immutable). When you are finished, call r.did_create(dircap)
        so I can update my database.

        If was_created() returns a dircap, you might be able to avoid the
        mkdir. Call r.should_check(), and if it says False, you can skip the
        mkdir and use the dircap returned by was_created().

        If should_check() returns True, you should perform a check operation
        on the dircap returned by was_created(). If the check indicates the
        directory is healthy, please call
        r.did_check_healthy(checker_results) so I can update the database,
        using the de-JSONized response from the webapi t=check call for
        'checker_results'. If the check indicates the directory is not
        healthy, please repair or re-create the directory and call
        r.did_create(dircap) when you're done.
        """

        now = time.time()
        entries = []
        for name in contents:
            entries.append([name.encode("utf-8"), contents[name]])
        entries.sort()
        data = b"".join([
            netstring(name_utf8) + netstring(cap)
            for (name_utf8, cap) in entries
        ])
        dirhash = backupdb_dirhash(data)
        dirhash_s = base32.b2a(dirhash)
        c = self.cursor
        c.execute(
            "SELECT dircap, last_checked"
            " FROM directories WHERE dirhash=?", (dirhash_s, ))
        row = c.fetchone()
        if not row:
            return DirectoryResult(self, dirhash_s, None, False)
        (dircap, last_checked) = row
        age = now - last_checked

        probability = ((age - self.NO_CHECK_BEFORE) /
                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
        probability = min(max(probability, 0.0), 1.0)
        should_check = bool(random.random() < probability)

        return DirectoryResult(self, dirhash_s, to_bytes(dircap), should_check)
Ejemplo n.º 8
0
 def read_share_data(self, offset, length):
     precondition(offset >= 0)
     # reads beyond the end of the data are truncated. Reads that start
     # beyond the end of the data return an empty string.
     seekpos = self._data_offset + offset
     actuallength = max(0, min(length, self._lease_offset - seekpos))
     if actuallength == 0:
         return b""
     with open(self.home, 'rb') as f:
         f.seek(seekpos)
         return f.read(actuallength)
Ejemplo n.º 9
0
 def create_data(self):
     fileutil.make_dirs(self.basedir)
     for i in range(self.count):
         s = self.size
         fn = os.path.join(self.basedir, str(i))
         if os.path.exists(fn):
             os.unlink(fn)
         f = open(fn, "wb")
         f.write(os.urandom(8))
         s -= 8
         while s > 0:
             chunk = min(s, 4096)
             f.write(b"\x00" * chunk)
             s -= chunk
         f.close()
Ejemplo n.º 10
0
    def _check_chk(self, storage_index, lp):
        res = upload.HelperUploadResults()
        res.uri_extension_hash = hashutil.uri_extension_hash(b"")

        # we're pretending that the file they're trying to upload was already
        # present in the grid. We return some information about the file, so
        # the client can decide if they like the way it looks. The parameters
        # used here are chosen to match the defaults.
        PARAMS = FakeClient.DEFAULT_ENCODING_PARAMETERS
        ueb_data = {
            "needed_shares": PARAMS["k"],
            "total_shares": PARAMS["n"],
            "segment_size": min(PARAMS["max_segment_size"], len(DATA)),
            "size": len(DATA),
        }
        res.uri_extension_data = ueb_data
        return defer.succeed(res)
Ejemplo n.º 11
0
    def test_already_uploaded(self):
        """
        If enough shares to satisfy the needed parameter already exist, the upload
        succeeds without pushing any shares.
        """
        params = FakeClient.DEFAULT_ENCODING_PARAMETERS
        chk_checker = partial(
            FakeCHKCheckerAndUEBFetcher,
            sharemap=dictutil.DictOfSets({
                0: {b"server0"},
                1: {b"server1"},
            }),
            ueb_data={
                "size": len(DATA),
                "segment_size": min(params["max_segment_size"], len(DATA)),
                "needed_shares": params["k"],
                "total_shares": params["n"],
            },
        )
        self.basedir = "helper/AssistedUpload/test_already_uploaded"
        self.setUpHelper(
            self.basedir,
            chk_checker=chk_checker,
        )
        u = make_uploader(self.helper_furl, self.s)

        yield wait_a_few_turns()

        assert u._helper

        results = yield upload_data(u,
                                    DATA,
                                    convergence=b"some convergence string")
        the_uri = results.get_uri()
        assert b"CHK" in the_uri

        files = os.listdir(os.path.join(self.basedir, "CHK_encoding"))
        self.failUnlessEqual(files, [])
        files = os.listdir(os.path.join(self.basedir, "CHK_incoming"))
        self.failUnlessEqual(files, [])

        self.assertEqual(
            results.get_pushed_shares(),
            0,
        )
Ejemplo n.º 12
0
    def header(self, max_size):
        # type: (int) -> bytes
        """
        Construct a container header.

        :param max_size: the maximum size the container can hold

        :return: the header bytes
        """
        # The second field -- the four-byte share data length -- is no longer
        # used as of Tahoe v1.3.0, but we continue to write it in there in
        # case someone downgrades a storage server from >= Tahoe-1.3.0 to <
        # Tahoe-1.3.0, or moves a share file from one server to another,
        # etc. We do saturation -- a share data length larger than 2**32-1
        # (what can fit into the field) is marked as the largest length that
        # can fit into the field. That way, even if this does happen, the old
        # < v1.3.0 server will still allow clients to read the first part of
        # the share.
        return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0)
Ejemplo n.º 13
0
    def remote_upload_random_data_from_file(self, size, convergence):
        tempdir = tempfile.mkdtemp()
        filename = os.path.join(tempdir, "data")
        f = open(filename, "wb")
        block = b"a" * 8192
        while size > 0:
            l = min(size, 8192)
            f.write(block[:l])
            size -= l
        f.close()
        uploader = self.parent.getServiceNamed("uploader")
        u = upload.FileName(filename, convergence=convergence)
        # XXX should pass reactor arg
        d = uploader.upload(u)
        d.addCallback(lambda results: results.get_uri())

        def _done(uri):
            os.remove(filename)
            os.rmdir(tempdir)
            return uri

        d.addCallback(_done)
        return d
Ejemplo n.º 14
0
    def render(self, req):
        gte = static.getTypeAndEncoding
        ctype, encoding = gte(self.filename,
                              static.File.contentTypes,
                              static.File.contentEncodings,
                              defaultType="text/plain")
        req.setHeader("content-type", ctype)
        if encoding:
            req.setHeader("content-encoding", encoding)

        if boolean_of_arg(get_arg(req, "save", "False")):
            # tell the browser to save the file rather display it we don't
            # try to encode the filename, instead we echo back the exact same
            # bytes we were given in the URL. See the comment in
            # FileNodeHandler.render_GET for the sad details.
            req.setHeader("content-disposition",
                          b'attachment; filename="%s"' % self.filename)

        filesize = self.filenode.get_size()
        assert isinstance(filesize, (int,long)), filesize
        first, size = 0, None
        contentsize = filesize
        req.setHeader("accept-ranges", "bytes")

        # TODO: for mutable files, use the roothash. For LIT, hash the data.
        # or maybe just use the URI for CHK and LIT.
        rangeheader = req.getHeader('range')
        if rangeheader:
            ranges = self.parse_range_header(rangeheader)

            # ranges = None means the header didn't parse, so ignore
            # the header as if it didn't exist.  If is more than one
            # range, then just return the first for now, until we can
            # generate multipart/byteranges.
            if ranges is not None:
                first, last = ranges[0]

                if first >= filesize:
                    raise WebError('First beyond end of file',
                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
                else:
                    first = max(0, first)
                    last = min(filesize-1, last)

                    req.setResponseCode(http.PARTIAL_CONTENT)
                    req.setHeader('content-range',"bytes %s-%s/%s" %
                                  (str(first), str(last),
                                   str(filesize)))
                    contentsize = last - first + 1
                    size = contentsize

        req.setHeader("content-length", b"%d" % contentsize)
        if req.method == b"HEAD":
            return b""

        d = self.filenode.read(req, first, size)

        def _error(f):
            if f.check(defer.CancelledError):
                # The HTTP connection was lost and we no longer have anywhere
                # to send our result.  Let this pass through.
                return f
            if req.startedWriting:
                # The content-type is already set, and the response code has
                # already been sent, so we can't provide a clean error
                # indication. We can emit text (which a browser might
                # interpret as something else), and if we sent a Size header,
                # they might notice that we've truncated the data. Keep the
                # error message small to improve the chances of having our
                # error response be shorter than the intended results.
                #
                # We don't have a lot of options, unfortunately.
                return b"problem during download\n"
            else:
                # We haven't written anything yet, so we can provide a
                # sensible error message.
                return f
        d.addCallbacks(
            lambda ignored: None,
            _error,
        )
        return d
Ejemplo n.º 15
0
    def check_file(self, path, use_timestamps=True):
        """I will tell you if a given local file needs to be uploaded or not,
        by looking in a database and seeing if I have a record of this file
        having been uploaded earlier.

        I return a FileResults object, synchronously. If r.was_uploaded()
        returns False, you should upload the file. When you are finished
        uploading it, call r.did_upload(filecap), so I can update my
        database.

        If was_uploaded() returns a filecap, you might be able to avoid an
        upload. Call r.should_check(), and if it says False, you can skip the
        upload and use the filecap returned by was_uploaded().

        If should_check() returns True, you should perform a filecheck on the
        filecap returned by was_uploaded(). If the check indicates the file
        is healthy, please call r.did_check_healthy(checker_results) so I can
        update the database, using the de-JSONized response from the webapi
        t=check call for 'checker_results'. If the check indicates the file
        is not healthy, please upload the file and call r.did_upload(filecap)
        when you're done.

        If use_timestamps=True (the default), I will compare ctime and mtime
        of the local file against an entry in my database, and consider the
        file to be unchanged if ctime, mtime, and filesize are all the same
        as the earlier version. If use_timestamps=False, I will not trust the
        timestamps, so more files (perhaps all) will be marked as needing
        upload. A future version of this database may hash the file to make
        equality decisions, in which case use_timestamps=False will not
        always imply r.must_upload()==True.

        'path' points to a local file on disk, possibly relative to the
        current working directory. The database stores absolute pathnames.
        """

        path = abspath_expanduser_unicode(path)

        # TODO: consider using get_pathinfo.
        s = os.stat(path)
        size = s[stat.ST_SIZE]
        ctime = s[stat.ST_CTIME]
        mtime = s[stat.ST_MTIME]

        now = time.time()
        c = self.cursor

        c.execute(
            "SELECT size,mtime,ctime,fileid"
            " FROM local_files"
            " WHERE path=?", (path, ))
        row = self.cursor.fetchone()
        if not row:
            return FileResult(self, None, False, path, mtime, ctime, size)
        (last_size, last_mtime, last_ctime, last_fileid) = row

        c.execute(
            "SELECT caps.filecap, last_upload.last_checked"
            " FROM caps,last_upload"
            " WHERE caps.fileid=? AND last_upload.fileid=?",
            (last_fileid, last_fileid))
        row2 = c.fetchone()

        if ((last_size != size or not use_timestamps or last_mtime != mtime
             or last_ctime != ctime)  # the file has been changed
                or
            (not row2)  # we somehow forgot where we put the file last time
            ):
            c.execute("DELETE FROM local_files WHERE path=?", (path, ))
            self.connection.commit()
            return FileResult(self, None, False, path, mtime, ctime, size)

        # at this point, we're allowed to assume the file hasn't been changed
        (filecap, last_checked) = row2
        age = now - last_checked

        probability = ((age - self.NO_CHECK_BEFORE) /
                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
        probability = min(max(probability, 0.0), 1.0)
        should_check = bool(random.random() < probability)

        return FileResult(self, to_bytes(filecap), should_check, path, mtime,
                          ctime, size)
Ejemplo n.º 16
0
def _describe_mutable_share(abs_sharefile, f, now, si_s, out):
    # mutable share
    m = MutableShareFile(abs_sharefile)
    WE, nodeid = m._read_write_enabler_and_nodeid(f)
    data_length = m._read_data_length(f)
    expiration_time = min(
        [lease.get_expiration_time() for (i, lease) in m._enumerate_leases(f)])
    expiration = max(0, expiration_time - now)

    share_type = "unknown"
    f.seek(m.DATA_OFFSET)
    version = f.read(1)
    if version == b"\x00":
        # this slot contains an SMDF share
        share_type = "SDMF"
    elif version == b"\x01":
        share_type = "MDMF"

    if share_type == "SDMF":
        f.seek(m.DATA_OFFSET)

        # Read at least the mutable header length, if possible.  If there's
        # less data than that in the share, don't try to read more (we won't
        # be able to unpack the header in this case but we surely don't want
        # to try to unpack bytes *following* the data section as if they were
        # header data).  Rather than 2000 we could use HEADER_LENGTH from
        # allmydata/mutable/layout.py, probably.
        data = f.read(min(data_length, 2000))

        try:
            pieces = unpack_share(data)
        except NeedMoreDataError as e:
            # retry once with the larger size
            size = e.needed_bytes
            f.seek(m.DATA_OFFSET)
            data = f.read(min(data_length, size))
            pieces = unpack_share(data)
        (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
         share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces

        print("SDMF %s %d/%d %d #%d:%s %d %s" % \
              (si_s, k, N, datalen,
               seqnum, str(base32.b2a(root_hash), "utf-8"),
               expiration, quote_output(abs_sharefile)), file=out)
    elif share_type == "MDMF":
        fake_shnum = 0

        # TODO: factor this out with dump_MDMF_share()
        class ShareDumper(MDMFSlotReadProxy):
            def _read(self, readvs, force_remote=False, queue=False):
                data = []
                for (where, length) in readvs:
                    f.seek(m.DATA_OFFSET + where)
                    data.append(f.read(length))
                return defer.succeed({fake_shnum: data})

        p = ShareDumper(None, "fake-si", fake_shnum)

        def extract(func):
            stash = []
            # these methods return Deferreds, but we happen to know that
            # they run synchronously when not actually talking to a
            # remote server
            d = func()
            d.addCallback(stash.append)
            return stash[0]

        verinfo = extract(p.get_verinfo)
        (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
         offsets) = verinfo
        print("MDMF %s %d/%d %d #%d:%s %d %s" % \
              (si_s, k, N, datalen,
               seqnum, str(base32.b2a(root_hash), "utf-8"),
               expiration, quote_output(abs_sharefile)), file=out)
    else:
        print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)
Ejemplo n.º 17
0
def dump_SDMF_share(m, length, options):
    from allmydata.mutable.layout import unpack_share, unpack_header
    from allmydata.mutable.common import NeedMoreDataError
    from allmydata.util import base32, hashutil
    from allmydata.uri import SSKVerifierURI
    from allmydata.util.encodingutil import quote_output, to_bytes

    offset = m.DATA_OFFSET

    out = options.stdout

    f = open(options['filename'], "rb")
    f.seek(offset)
    data = f.read(min(length, 2000))
    f.close()

    try:
        pieces = unpack_share(data)
    except NeedMoreDataError as e:
        # retry once with the larger size
        size = e.needed_bytes
        f = open(options['filename'], "rb")
        f.seek(offset)
        data = f.read(min(length, size))
        f.close()
        pieces = unpack_share(data)

    (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
     share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces
    (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize,
     ig_datalen, offsets) = unpack_header(data)

    print(" SDMF contents:", file=out)
    print("  seqnum: %d" % seqnum, file=out)
    print("  root_hash: %s" % str(base32.b2a(root_hash), "utf-8"), file=out)
    print("  IV: %s" % str(base32.b2a(IV), "utf-8"), file=out)
    print("  required_shares: %d" % k, file=out)
    print("  total_shares: %d" % N, file=out)
    print("  segsize: %d" % segsize, file=out)
    print("  datalen: %d" % datalen, file=out)
    print("  enc_privkey: %d bytes" % len(enc_privkey), file=out)
    print("  pubkey: %d bytes" % len(pubkey), file=out)
    print("  signature: %d bytes" % len(signature), file=out)
    share_hash_ids = ",".join(
        sorted([str(hid) for hid in share_hash_chain.keys()]))
    print("  share_hash_chain: %s" % share_hash_ids, file=out)
    print("  block_hash_tree: %d nodes" % len(block_hash_tree), file=out)

    # the storage index isn't stored in the share itself, so we depend upon
    # knowing the parent directory name to get it
    pieces = options['filename'].split(os.sep)
    if len(pieces) >= 2:
        piece = to_bytes(pieces[-2])
        if base32.could_be_base32_encoded(piece):
            storage_index = base32.a2b(piece)
            fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey)
            u = SSKVerifierURI(storage_index, fingerprint)
            verify_cap = u.to_string()
            print("  verify-cap:",
                  quote_output(verify_cap, quotemarks=False),
                  file=out)

    if options['offsets']:
        # NOTE: this offset-calculation code is fragile, and needs to be
        # merged with MutableShareFile's internals.
        print(file=out)
        print(" Section Offsets:", file=out)

        def printoffset(name, value, shift=0):
            print("%s%20s: %s   (0x%x)" % (" " * shift, name, value, value),
                  file=out)

        printoffset("first lease", m.HEADER_SIZE)
        printoffset("share data", m.DATA_OFFSET)
        o_seqnum = m.DATA_OFFSET + struct.calcsize(">B")
        printoffset("seqnum", o_seqnum, 2)
        o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ")
        printoffset("root_hash", o_root_hash, 2)
        for k in [
                "signature", "share_hash_chain", "block_hash_tree",
                "share_data", "enc_privkey", "EOF"
        ]:
            name = {
                "share_data": "block data",
                "EOF": "end of share data"
            }.get(k, k)
            offset = m.DATA_OFFSET + offsets[k]
            printoffset(name, offset, 2)
        f = open(options['filename'], "rb")
        printoffset("extra leases", m._read_extra_lease_offset(f) + 4)
        f.close()

    print(file=out)
Ejemplo n.º 18
0
def describe_share(abs_sharefile, si_s, shnum_s, now, out):
    from allmydata import uri
    from allmydata.storage.mutable import MutableShareFile
    from allmydata.storage.immutable import ShareFile
    from allmydata.mutable.layout import unpack_share
    from allmydata.mutable.common import NeedMoreDataError
    from allmydata.immutable.layout import ReadBucketProxy
    from allmydata.util import base32
    from allmydata.util.encodingutil import quote_output
    import struct

    f = open(abs_sharefile, "rb")
    prefix = f.read(32)

    if prefix == MutableShareFile.MAGIC:
        # mutable share
        m = MutableShareFile(abs_sharefile)
        WE, nodeid = m._read_write_enabler_and_nodeid(f)
        data_length = m._read_data_length(f)
        expiration_time = min(
            [lease.expiration_time for (i, lease) in m._enumerate_leases(f)])
        expiration = max(0, expiration_time - now)

        share_type = "unknown"
        f.seek(m.DATA_OFFSET)
        version = f.read(1)
        if version == b"\x00":
            # this slot contains an SMDF share
            share_type = "SDMF"
        elif version == b"\x01":
            share_type = "MDMF"

        if share_type == "SDMF":
            f.seek(m.DATA_OFFSET)
            data = f.read(min(data_length, 2000))

            try:
                pieces = unpack_share(data)
            except NeedMoreDataError as e:
                # retry once with the larger size
                size = e.needed_bytes
                f.seek(m.DATA_OFFSET)
                data = f.read(min(data_length, size))
                pieces = unpack_share(data)
            (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
             share_hash_chain, block_hash_tree, share_data,
             enc_privkey) = pieces

            print("SDMF %s %d/%d %d #%d:%s %d %s" % \
                  (si_s, k, N, datalen,
                   seqnum, str(base32.b2a(root_hash), "utf-8"),
                   expiration, quote_output(abs_sharefile)), file=out)
        elif share_type == "MDMF":
            from allmydata.mutable.layout import MDMFSlotReadProxy
            fake_shnum = 0

            # TODO: factor this out with dump_MDMF_share()
            class ShareDumper(MDMFSlotReadProxy):
                def _read(self, readvs, force_remote=False, queue=False):
                    data = []
                    for (where, length) in readvs:
                        f.seek(m.DATA_OFFSET + where)
                        data.append(f.read(length))
                    return defer.succeed({fake_shnum: data})

            p = ShareDumper(None, "fake-si", fake_shnum)

            def extract(func):
                stash = []
                # these methods return Deferreds, but we happen to know that
                # they run synchronously when not actually talking to a
                # remote server
                d = func()
                d.addCallback(stash.append)
                return stash[0]

            verinfo = extract(p.get_verinfo)
            (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
             offsets) = verinfo
            print("MDMF %s %d/%d %d #%d:%s %d %s" % \
                  (si_s, k, N, datalen,
                   seqnum, str(base32.b2a(root_hash), "utf-8"),
                   expiration, quote_output(abs_sharefile)), file=out)
        else:
            print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)

    elif struct.unpack(">L", prefix[:4]) == (1, ):
        # immutable

        class ImmediateReadBucketProxy(ReadBucketProxy):
            def __init__(self, sf):
                self.sf = sf
                ReadBucketProxy.__init__(self, None, None, "")

            def __repr__(self):
                return "<ImmediateReadBucketProxy>"

            def _read(self, offset, size):
                return defer.succeed(sf.read_share_data(offset, size))

        # use a ReadBucketProxy to parse the bucket and find the uri extension
        sf = ShareFile(abs_sharefile)
        bp = ImmediateReadBucketProxy(sf)

        expiration_time = min(
            [lease.expiration_time for lease in sf.get_leases()])
        expiration = max(0, expiration_time - now)

        UEB_data = call(bp.get_uri_extension)
        unpacked = uri.unpack_extension_readable(UEB_data)

        k = unpacked["needed_shares"]
        N = unpacked["total_shares"]
        filesize = unpacked["size"]
        ueb_hash = unpacked["UEB_hash"]

        print("CHK %s %d/%d %d %s %d %s" %
              (si_s, k, N, filesize, str(
                  ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)),
              file=out)

    else:
        print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile),
              file=out)

    f.close()