Esempio n. 1
0
 def _satisfy_block_hash_tree(self, needed_hashes):
     o_bh = self.actual_offsets["block_hashes"]
     block_hashes = {}
     for hashnum in needed_hashes:
         hashdata = self._received.get(o_bh + hashnum * HASH_SIZE,
                                       HASH_SIZE)
         if hashdata:
             block_hashes[hashnum] = hashdata
         else:
             return False  # missing some hashes
     # note that we don't submit any hashes to the block_hash_tree until
     # we've gotten them all, because the hash tree will throw an
     # exception if we only give it a partial set (which it therefore
     # cannot validate)
     try:
         self._commonshare.process_block_hashes(block_hashes)
     except (BadHashError, NotEnoughHashesError) as e:
         f = Failure(e)
         hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())])
         log.msg(format="hash failure in block_hashes=(%(hashnums)s),"
                 " from %(share)s",
                 hashnums=hashnums,
                 shnum=self._shnum,
                 share=repr(self),
                 failure=f,
                 level=log.WEIRD,
                 parent=self._lp,
                 umid="yNyFdA")
         hsize = max(0, max(needed_hashes)) * HASH_SIZE
         self._signal_corruption(f, o_bh, hsize)
         self.had_corruption = True
         raise
     for hashnum in needed_hashes:
         self._received.remove(o_bh + hashnum * HASH_SIZE, HASH_SIZE)
     return True
Esempio n. 2
0
 def _satisfy_ciphertext_hash_tree(self, needed_hashes):
     start = self.actual_offsets["crypttext_hash_tree"]
     hashes = {}
     for hashnum in needed_hashes:
         hashdata = self._received.get(start + hashnum * HASH_SIZE,
                                       HASH_SIZE)
         if hashdata:
             hashes[hashnum] = hashdata
         else:
             return False  # missing some hashes
     # we don't submit any hashes to the ciphertext_hash_tree until we've
     # gotten them all
     try:
         self._node.process_ciphertext_hashes(hashes)
     except (BadHashError, NotEnoughHashesError) as e:
         f = Failure(e)
         hashnums = ",".join([str(n) for n in sorted(hashes.keys())])
         log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s),"
                 " from %(share)s",
                 hashnums=hashnums,
                 share=repr(self),
                 failure=f,
                 level=log.WEIRD,
                 parent=self._lp,
                 umid="iZI0TA")
         hsize = max(0, max(needed_hashes)) * HASH_SIZE
         self._signal_corruption(f, start, hsize)
         self.had_corruption = True
         raise
     for hashnum in needed_hashes:
         self._received.remove(start + hashnum * HASH_SIZE, HASH_SIZE)
     return True
Esempio n. 3
0
 def write_results(self, data):
     stdout = self.options.stdout
     keys = (
         "count-immutable-files",
         "count-mutable-files",
         "count-literal-files",
         "count-files",
         "count-directories",
         "size-immutable-files",
         "size-mutable-files",
         "size-literal-files",
         "size-directories",
         "largest-directory",
         "largest-immutable-file",
     )
     width = max([len(k) for k in keys])
     print("Counts and Total Sizes:", file=stdout)
     for k in keys:
         fmt = "%" + str(width) + "s: %d"
         if k in data:
             value = data[k]
             if not k.startswith("count-") and value > 1000:
                 absize = abbreviate_space_both(value)
                 print(fmt % (k, data[k]), "  ", absize, file=stdout)
             else:
                 print(fmt % (k, data[k]), file=stdout)
     if data["size-files-histogram"]:
         print("Size Histogram:", file=stdout)
         prevmax = None
         maxlen = max([
             len(str(maxsize))
             for (minsize, maxsize, count) in data["size-files-histogram"]
         ])
         maxcountlen = max([
             len(str(count))
             for (minsize, maxsize, count) in data["size-files-histogram"]
         ])
         minfmt = "%" + str(maxlen) + "d"
         maxfmt = "%-" + str(maxlen) + "d"
         countfmt = "%-" + str(maxcountlen) + "d"
         linefmt = minfmt + "-" + maxfmt + " : " + countfmt + "    %s"
         for (minsize, maxsize, count) in data["size-files-histogram"]:
             if prevmax is not None and minsize != prevmax + 1:
                 print(" " * (maxlen - 1) + "...", file=stdout)
             prevmax = maxsize
             print(
                 linefmt %
                 (minsize, maxsize, count, abbreviate_space_both(maxsize)),
                 file=stdout)
Esempio n. 4
0
def list_aliases(options):
    """
    Show aliases that exist.
    """
    data = _get_alias_details(options['node-directory'])

    if options['json']:
        dumped = json.dumps(data, indent=4)
        if isinstance(dumped, bytes):
            dumped = dumped.decode("utf-8")
        output = _escape_format(dumped)
    else:

        def dircap(details):
            return (details['readonly'] if options['readonly-uri'] else
                    details['readwrite']).decode("utf-8")

        def format_dircap(name, details):
            return fmt % (name, dircap(details))

        max_width = max([len(quote_output(name))
                         for name in data.keys()] + [0])
        fmt = "%" + str(max_width) + "s: %s"
        output = "\n".join(
            list(
                format_dircap(name, details)
                for name, details in data.items()))

    if output:
        # Show whatever we computed.  Skip this if there is no output to avoid
        # a spurious blank line.
        show_output(options.stdout, output)

    return 0
Esempio n. 5
0
def _describe_immutable_share(abs_sharefile, now, si_s, out):
    class ImmediateReadBucketProxy(ReadBucketProxy):
        def __init__(self, sf):
            self.sf = sf
            ReadBucketProxy.__init__(self, None, None, "")

        def __repr__(self):
            return "<ImmediateReadBucketProxy>"

        def _read(self, offset, size):
            return defer.succeed(sf.read_share_data(offset, size))

    # use a ReadBucketProxy to parse the bucket and find the uri extension
    sf = ShareFile(abs_sharefile)
    bp = ImmediateReadBucketProxy(sf)

    expiration_time = min(lease.get_expiration_time()
                          for lease in sf.get_leases())
    expiration = max(0, expiration_time - now)

    UEB_data = call(bp.get_uri_extension)
    unpacked = uri.unpack_extension_readable(UEB_data)

    k = unpacked["needed_shares"]
    N = unpacked["total_shares"]
    filesize = unpacked["size"]
    ueb_hash = unpacked["UEB_hash"]

    print("CHK %s %d/%d %d %s %d %s" %
          (si_s, k, N, filesize, str(
              ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)),
          file=out)
Esempio n. 6
0
 def _operation_complete(self, res, ophandle):
     if ophandle in self.handles:
         if ophandle not in self.timers:
             # the client has not provided a retain-for= value for this
             # handle, so we set our own.
             now = time.time()
             added = self.handles[ophandle][WHEN_ADDED]
             when = max(self.UNCOLLECTED_HANDLE_LIFETIME, now - added)
             self._set_timer(ophandle, when)
Esempio n. 7
0
    def read(self, consumer, offset, size):
        """I am the main entry point, from which FileNode.read() can get
        data. I feed the consumer with the desired range of ciphertext. I
        return a Deferred that fires (with the consumer) when the read is
        finished.

        Note that there is no notion of a 'file pointer': each call to read()
        uses an independent offset= value.
        """
        # for concurrent operations: each gets its own Segmentation manager
        if size is None:
            size = self._verifycap.size
        # ignore overruns: clip size so offset+size does not go past EOF, and
        # so size is not negative (which indicates that offset >= EOF)
        size = max(0, min(size, self._verifycap.size - offset))

        read_ev = self._download_status.add_read_event(offset, size, now())
        if IDownloadStatusHandlingConsumer.providedBy(consumer):
            consumer.set_download_status_read_event(read_ev)
            consumer.set_download_status(self._download_status)

        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
                     si=base32.b2a(self._verifycap.storage_index)[:8],
                     offset=offset,
                     size=size,
                     level=log.OPERATIONAL,
                     parent=self._lp,
                     umid="l3j3Ww")
        if self._history:
            sp = self._history.stats_provider
            sp.count("downloader.files_downloaded", 1)  # really read() calls
            sp.count("downloader.bytes_downloaded", size)
        if size == 0:
            read_ev.finished(now())
            # no data, so no producer, so no register/unregisterProducer
            return defer.succeed(consumer)

        # for concurrent operations, each read() gets its own Segmentation
        # manager
        s = Segmentation(self, offset, size, consumer, read_ev, lp)

        # this raises an interesting question: what segments to fetch? if
        # offset=0, always fetch the first segment, and then allow
        # Segmentation to be responsible for pulling the subsequent ones if
        # the first wasn't large enough. If offset>0, we're going to need an
        # extra roundtrip to get the UEB (and therefore the segment size)
        # before we can figure out which segment to get. TODO: allow the
        # offset-table-guessing code (which starts by guessing the segsize)
        # to assist the offset>0 process.
        d = s.start()

        def _done(res):
            read_ev.finished(now())
            return res

        d.addBoth(_done)
        return d
Esempio n. 8
0
 def read_share_data(self, offset, length):
     precondition(offset >= 0)
     # reads beyond the end of the data are truncated. Reads that start
     # beyond the end of the data return an empty string.
     seekpos = self._data_offset + offset
     actuallength = max(0, min(length, self._lease_offset - seekpos))
     if actuallength == 0:
         return b""
     with open(self.home, 'rb') as f:
         f.seek(seekpos)
         return f.read(actuallength)
Esempio n. 9
0
    def check_directory(self, contents):
        """I will tell you if a new directory needs to be created for a given
        set of directory contents, or if I know of an existing (immutable)
        directory that can be used instead.

        'contents' should be a dictionary that maps from child name (a single
        unicode string) to immutable childcap (filecap or dircap).

        I return a DirectoryResult object, synchronously. If r.was_created()
        returns False, you should create the directory (with
        t=mkdir-immutable). When you are finished, call r.did_create(dircap)
        so I can update my database.

        If was_created() returns a dircap, you might be able to avoid the
        mkdir. Call r.should_check(), and if it says False, you can skip the
        mkdir and use the dircap returned by was_created().

        If should_check() returns True, you should perform a check operation
        on the dircap returned by was_created(). If the check indicates the
        directory is healthy, please call
        r.did_check_healthy(checker_results) so I can update the database,
        using the de-JSONized response from the webapi t=check call for
        'checker_results'. If the check indicates the directory is not
        healthy, please repair or re-create the directory and call
        r.did_create(dircap) when you're done.
        """

        now = time.time()
        entries = []
        for name in contents:
            entries.append([name.encode("utf-8"), contents[name]])
        entries.sort()
        data = b"".join([
            netstring(name_utf8) + netstring(cap)
            for (name_utf8, cap) in entries
        ])
        dirhash = backupdb_dirhash(data)
        dirhash_s = base32.b2a(dirhash)
        c = self.cursor
        c.execute(
            "SELECT dircap, last_checked"
            " FROM directories WHERE dirhash=?", (dirhash_s, ))
        row = c.fetchone()
        if not row:
            return DirectoryResult(self, dirhash_s, None, False)
        (dircap, last_checked) = row
        age = now - last_checked

        probability = ((age - self.NO_CHECK_BEFORE) /
                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
        probability = min(max(probability, 0.0), 1.0)
        should_check = bool(random.random() < probability)

        return DirectoryResult(self, dirhash_s, to_bytes(dircap), should_check)
Esempio n. 10
0
 def _read_share_data(self, f, offset, length):
     precondition(offset >= 0)
     data_length = self._read_data_length(f)
     if offset + length > data_length:
         # reads beyond the end of the data are truncated. Reads that
         # start beyond the end of the data return an empty string.
         length = max(0, data_length - offset)
     if length == 0:
         return b""
     precondition(offset + length <= data_length)
     f.seek(self.DATA_OFFSET + offset)
     data = f.read(length)
     return data
Esempio n. 11
0
    def lease_last_cycle_results(self, req, tag):
        lc = self._storage.lease_checker
        h = lc.get_state()["history"]
        if not h:
            return ""
        biggest = str(max(int(k) for k in h.keys()))
        last = h[biggest]

        start, end = last["cycle-start-finish-times"]
        tag(
            "Last complete cycle (which took %s and finished %s ago)"
            " recovered: " %
            (abbreviate_time(end - start), abbreviate_time(time.time() - end)),
            self.format_recovered(last["space-recovered"], "actual"))

        p = T.ul()

        def add(*pieces):
            p(T.li(pieces))

        saw = self.format_recovered(last["space-recovered"], "examined")
        add("and saw a total of ", saw)

        if not last["expiration-enabled"]:
            rec = self.format_recovered(last["space-recovered"], "configured")
            add(
                "but expiration was not enabled. If it had been, "
                "it would have recovered: ", rec)

        if last["corrupt-shares"]:
            add(
                "Corrupt shares:",
                T.ul((T.li([
                    "SI %s shnum %d" % (si, shnum)
                    for si, shnum in last["corrupt-shares"]
                ]))))

        return tag(p)
Esempio n. 12
0
def ls(options):
    nodeurl = options['node-url']
    aliases = options.aliases
    where = options.where
    stdout = options.stdout
    stderr = options.stderr

    if not nodeurl.endswith("/"):
        nodeurl += "/"
    if where.endswith("/"):
        where = where[:-1]
    try:
        rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS)
    except UnknownAliasError as e:
        e.display(stderr)
        return 1

    path = str(path, "utf-8")
    url = nodeurl + "uri/%s" % url_quote(rootcap)
    if path:
        # move where.endswith check here?
        url += "/" + escape_path(path)
    assert not url.endswith("/")
    url += "?t=json"
    resp = do_http("GET", url)
    if resp.status == 404:
        print("No such file or directory", file=stderr)
        return 2
    if resp.status != 200:
        print(format_http_error("Error during GET", resp), file=stderr)
        if resp.status == 0:
            return 3
        else:
            return resp.status

    data = resp.read()
    if options['json']:
        # The webapi server should always output printable ASCII.
        if is_printable_ascii(data):
            data = str(data, "ascii")
            print(data, file=stdout)
            return 0
        else:
            print("The JSON response contained unprintable characters:",
                  file=stderr)
            print(quote_output(data, quotemarks=False), file=stderr)
            return 1

    try:
        parsed = json.loads(data)
    except Exception as e:
        print("error: %s" % quote_output(e.args[0], quotemarks=False),
              file=stderr)
        print("Could not parse JSON response:", file=stderr)
        print(quote_output(data, quotemarks=False), file=stderr)
        return 1

    nodetype, d = parsed
    children = {}
    if nodetype == "dirnode":
        children = d['children']
    else:
        # paths returned from get_alias are always valid UTF-8
        childname = path.split("/")[-1]
        children = {childname: (nodetype, d)}
        if "metadata" not in d:
            d["metadata"] = {}
    childnames = sorted(children.keys())
    now = time.time()

    # we build up a series of rows, then we loop through them to compute a
    # maxwidth so we can format them tightly. Size, filename, and URI are the
    # variable-width ones.
    rows = []
    has_unknowns = False

    for name in childnames:
        child = children[name]
        name = str(name)
        childtype = child[0]

        # See webapi.txt for a discussion of the meanings of unix local
        # filesystem mtime and ctime, Tahoe mtime and ctime, and Tahoe
        # linkmotime and linkcrtime.
        ctime = child[1].get("metadata", {}).get('tahoe', {}).get("linkcrtime")
        if not ctime:
            ctime = child[1]["metadata"].get("ctime")

        mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime")
        if not mtime:
            mtime = child[1]["metadata"].get("mtime")
        rw_uri = to_bytes(child[1].get("rw_uri"))
        ro_uri = to_bytes(child[1].get("ro_uri"))
        if ctime:
            # match for formatting that GNU 'ls' does
            if (now - ctime) > 6 * 30 * 24 * 60 * 60:
                # old files
                fmt = "%b %d  %Y"
            else:
                fmt = "%b %d %H:%M"
            ctime_s = time.strftime(fmt, time.localtime(ctime))
        else:
            ctime_s = "-"
        if childtype == "dirnode":
            t0 = "d"
            size = "-"
            classify = "/"
        elif childtype == "filenode":
            t0 = "-"
            size = str(child[1].get("size", "?"))
            classify = ""
            if rw_uri:
                classify = "*"
        else:
            has_unknowns = True
            t0 = "?"
            size = "?"
            classify = "?"
        t1 = "-"
        if ro_uri:
            t1 = "r"
        t2 = "-"
        if rw_uri:
            t2 = "w"
        t3 = "-"
        if childtype == "dirnode":
            t3 = "x"

        uri = rw_uri or ro_uri

        line = []
        if options["long"]:
            line.append(t0 + t1 + t2 + t3)
            line.append(size)
            line.append(ctime_s)
        if not options["classify"]:
            classify = ""

        line.append(name + classify)

        if options["uri"]:
            line.append(ensure_text(uri))
        if options["readonly-uri"]:
            line.append(
                quote_output(ensure_text(ro_uri) or "-", quotemarks=False))

        rows.append(line)

    max_widths = []
    left_justifys = []
    for row in rows:
        for i, cell in enumerate(row):
            while len(max_widths) <= i:
                max_widths.append(0)
            while len(left_justifys) <= i:
                left_justifys.append(False)
            max_widths[i] = max(max_widths[i], len(cell))
            if ensure_text(cell).startswith("URI"):
                left_justifys[i] = True
    if len(left_justifys) == 1:
        left_justifys[0] = True
    fmt_pieces = []
    for i in range(len(max_widths)):
        piece = "%"
        if left_justifys[i]:
            piece += "-"
        piece += str(max_widths[i])
        piece += "s"
        fmt_pieces.append(piece)
    fmt = " ".join(fmt_pieces)

    rc = 0
    for row in rows:
        row = (fmt % tuple(row)).rstrip()
        encoding_error = False
        try:
            row = unicode_to_output(row)
        except UnicodeEncodeError:
            encoding_error = True
            row = quote_output(row)
        if encoding_error:
            print(row, file=stderr)
            rc = 1
        else:
            print(row, file=stdout)

    if rc == 1:
        print("\nThis listing included files whose names could not be converted to the terminal" \
                        "\noutput encoding. Their names are shown using backslash escapes and in quotes.", file=stderr)
    if has_unknowns:
        print("\nThis listing included unknown objects. Using a webapi server that supports" \
                        "\na later version of Tahoe may help.", file=stderr)

    return rc
Esempio n. 13
0
def _describe_mutable_share(abs_sharefile, f, now, si_s, out):
    # mutable share
    m = MutableShareFile(abs_sharefile)
    WE, nodeid = m._read_write_enabler_and_nodeid(f)
    data_length = m._read_data_length(f)
    expiration_time = min(
        [lease.get_expiration_time() for (i, lease) in m._enumerate_leases(f)])
    expiration = max(0, expiration_time - now)

    share_type = "unknown"
    f.seek(m.DATA_OFFSET)
    version = f.read(1)
    if version == b"\x00":
        # this slot contains an SMDF share
        share_type = "SDMF"
    elif version == b"\x01":
        share_type = "MDMF"

    if share_type == "SDMF":
        f.seek(m.DATA_OFFSET)

        # Read at least the mutable header length, if possible.  If there's
        # less data than that in the share, don't try to read more (we won't
        # be able to unpack the header in this case but we surely don't want
        # to try to unpack bytes *following* the data section as if they were
        # header data).  Rather than 2000 we could use HEADER_LENGTH from
        # allmydata/mutable/layout.py, probably.
        data = f.read(min(data_length, 2000))

        try:
            pieces = unpack_share(data)
        except NeedMoreDataError as e:
            # retry once with the larger size
            size = e.needed_bytes
            f.seek(m.DATA_OFFSET)
            data = f.read(min(data_length, size))
            pieces = unpack_share(data)
        (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
         share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces

        print("SDMF %s %d/%d %d #%d:%s %d %s" % \
              (si_s, k, N, datalen,
               seqnum, str(base32.b2a(root_hash), "utf-8"),
               expiration, quote_output(abs_sharefile)), file=out)
    elif share_type == "MDMF":
        fake_shnum = 0

        # TODO: factor this out with dump_MDMF_share()
        class ShareDumper(MDMFSlotReadProxy):
            def _read(self, readvs, force_remote=False, queue=False):
                data = []
                for (where, length) in readvs:
                    f.seek(m.DATA_OFFSET + where)
                    data.append(f.read(length))
                return defer.succeed({fake_shnum: data})

        p = ShareDumper(None, "fake-si", fake_shnum)

        def extract(func):
            stash = []
            # these methods return Deferreds, but we happen to know that
            # they run synchronously when not actually talking to a
            # remote server
            d = func()
            d.addCallback(stash.append)
            return stash[0]

        verinfo = extract(p.get_verinfo)
        (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
         offsets) = verinfo
        print("MDMF %s %d/%d %d #%d:%s %d %s" % \
              (si_s, k, N, datalen,
               seqnum, str(base32.b2a(root_hash), "utf-8"),
               expiration, quote_output(abs_sharefile)), file=out)
    else:
        print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)
Esempio n. 14
0
def get_disk_stats(whichdir, reserved_space=0):
    """Return disk statistics for the storage disk, in the form of a dict
    with the following fields.
      total:            total bytes on disk
      free_for_root:    bytes actually free on disk
      free_for_nonroot: bytes free for "a non-privileged user" [Unix] or
                          the current user [Windows]; might take into
                          account quotas depending on platform
      used:             bytes used on disk
      avail:            bytes available excluding reserved space
    An AttributeError can occur if the OS has no API to get disk information.
    An EnvironmentError can occur if the OS call fails.

    whichdir is a directory on the filesystem in question -- the
    answer is about the filesystem, not about the directory, so the
    directory is used only to specify which filesystem.

    reserved_space is how many bytes to subtract from the answer, so
    you can pass how many bytes you would like to leave unused on this
    filesystem as reserved_space.
    """

    if have_GetDiskFreeSpaceExW:
        # If this is a Windows system and GetDiskFreeSpaceExW is available, use it.
        # (This might put up an error dialog unless
        # SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) has been called,
        # which we do in allmydata.windows.fixups.initialize().)

        n_free_for_nonroot = c_ulonglong(0)
        n_total            = c_ulonglong(0)
        n_free_for_root    = c_ulonglong(0)
        retval = GetDiskFreeSpaceExW(whichdir, byref(n_free_for_nonroot),
                                               byref(n_total),
                                               byref(n_free_for_root))
        if retval == 0:
            raise OSError("WinError: %s\n attempting to get disk statistics for %r"
                          % (WinError(get_last_error()), whichdir))
        free_for_nonroot = n_free_for_nonroot.value
        total            = n_total.value
        free_for_root    = n_free_for_root.value
    else:
        # For Unix-like systems.
        # <http://docs.python.org/library/os.html#os.statvfs>
        # <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html>
        # <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html>
        s = os.statvfs(whichdir)

        # on my mac laptop:
        #  statvfs(2) is a wrapper around statfs(2).
        #    statvfs.f_frsize = statfs.f_bsize :
        #     "minimum unit of allocation" (statvfs)
        #     "fundamental file system block size" (statfs)
        #    statvfs.f_bsize = statfs.f_iosize = stat.st_blocks : preferred IO size
        # on an encrypted home directory ("FileVault"), it gets f_blocks
        # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk,
        # but s.f_bavail*s.f_frsize is correct

        total = s.f_frsize * s.f_blocks
        free_for_root = s.f_frsize * s.f_bfree
        free_for_nonroot = s.f_frsize * s.f_bavail

    # valid for all platforms:
    used = total - free_for_root
    avail = max(free_for_nonroot - reserved_space, 0)

    return { 'total': total,
             'free_for_root': free_for_root,
             'free_for_nonroot': free_for_nonroot,
             'used': used,
             'avail': avail,
           }
Esempio n. 15
0
def describe_share(abs_sharefile, si_s, shnum_s, now, out):
    from allmydata import uri
    from allmydata.storage.mutable import MutableShareFile
    from allmydata.storage.immutable import ShareFile
    from allmydata.mutable.layout import unpack_share
    from allmydata.mutable.common import NeedMoreDataError
    from allmydata.immutable.layout import ReadBucketProxy
    from allmydata.util import base32
    from allmydata.util.encodingutil import quote_output
    import struct

    f = open(abs_sharefile, "rb")
    prefix = f.read(32)

    if prefix == MutableShareFile.MAGIC:
        # mutable share
        m = MutableShareFile(abs_sharefile)
        WE, nodeid = m._read_write_enabler_and_nodeid(f)
        data_length = m._read_data_length(f)
        expiration_time = min(
            [lease.expiration_time for (i, lease) in m._enumerate_leases(f)])
        expiration = max(0, expiration_time - now)

        share_type = "unknown"
        f.seek(m.DATA_OFFSET)
        version = f.read(1)
        if version == b"\x00":
            # this slot contains an SMDF share
            share_type = "SDMF"
        elif version == b"\x01":
            share_type = "MDMF"

        if share_type == "SDMF":
            f.seek(m.DATA_OFFSET)
            data = f.read(min(data_length, 2000))

            try:
                pieces = unpack_share(data)
            except NeedMoreDataError as e:
                # retry once with the larger size
                size = e.needed_bytes
                f.seek(m.DATA_OFFSET)
                data = f.read(min(data_length, size))
                pieces = unpack_share(data)
            (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
             share_hash_chain, block_hash_tree, share_data,
             enc_privkey) = pieces

            print("SDMF %s %d/%d %d #%d:%s %d %s" % \
                  (si_s, k, N, datalen,
                   seqnum, str(base32.b2a(root_hash), "utf-8"),
                   expiration, quote_output(abs_sharefile)), file=out)
        elif share_type == "MDMF":
            from allmydata.mutable.layout import MDMFSlotReadProxy
            fake_shnum = 0

            # TODO: factor this out with dump_MDMF_share()
            class ShareDumper(MDMFSlotReadProxy):
                def _read(self, readvs, force_remote=False, queue=False):
                    data = []
                    for (where, length) in readvs:
                        f.seek(m.DATA_OFFSET + where)
                        data.append(f.read(length))
                    return defer.succeed({fake_shnum: data})

            p = ShareDumper(None, "fake-si", fake_shnum)

            def extract(func):
                stash = []
                # these methods return Deferreds, but we happen to know that
                # they run synchronously when not actually talking to a
                # remote server
                d = func()
                d.addCallback(stash.append)
                return stash[0]

            verinfo = extract(p.get_verinfo)
            (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
             offsets) = verinfo
            print("MDMF %s %d/%d %d #%d:%s %d %s" % \
                  (si_s, k, N, datalen,
                   seqnum, str(base32.b2a(root_hash), "utf-8"),
                   expiration, quote_output(abs_sharefile)), file=out)
        else:
            print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)

    elif struct.unpack(">L", prefix[:4]) == (1, ):
        # immutable

        class ImmediateReadBucketProxy(ReadBucketProxy):
            def __init__(self, sf):
                self.sf = sf
                ReadBucketProxy.__init__(self, None, None, "")

            def __repr__(self):
                return "<ImmediateReadBucketProxy>"

            def _read(self, offset, size):
                return defer.succeed(sf.read_share_data(offset, size))

        # use a ReadBucketProxy to parse the bucket and find the uri extension
        sf = ShareFile(abs_sharefile)
        bp = ImmediateReadBucketProxy(sf)

        expiration_time = min(
            [lease.expiration_time for lease in sf.get_leases()])
        expiration = max(0, expiration_time - now)

        UEB_data = call(bp.get_uri_extension)
        unpacked = uri.unpack_extension_readable(UEB_data)

        k = unpacked["needed_shares"]
        N = unpacked["total_shares"]
        filesize = unpacked["size"]
        ueb_hash = unpacked["UEB_hash"]

        print("CHK %s %d/%d %d %s %d %s" %
              (si_s, k, N, filesize, str(
                  ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)),
              file=out)

    else:
        print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile),
              file=out)

    f.close()
Esempio n. 16
0
        """
        # The second field -- the four-byte share data length -- is no longer
        # used as of Tahoe v1.3.0, but we continue to write it in there in
        # case someone downgrades a storage server from >= Tahoe-1.3.0 to <
        # Tahoe-1.3.0, or moves a share file from one server to another,
        # etc. We do saturation -- a share data length larger than 2**32-1
        # (what can fit into the field) is marked as the largest length that
        # can fit into the field. That way, even if this does happen, the old
        # < v1.3.0 server will still allow clients to read the first part of
        # the share.
        return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0)


ALL_SCHEMAS = {
    _Schema(version=2, lease_serializer=v2_immutable),
    _Schema(version=1, lease_serializer=v1_immutable),
}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)


def schema_from_version(version):
    # (int) -> Optional[type]
    """
    Find the schema object that corresponds to a certain version number.
    """
    for schema in ALL_SCHEMAS:
        if schema.version == version:
            return schema
    return None
Esempio n. 17
0
 def max(self, key, value):
     self.stats[key] = max(self.stats[key], value)
Esempio n. 18
0
    def check_file(self, path, use_timestamps=True):
        """I will tell you if a given local file needs to be uploaded or not,
        by looking in a database and seeing if I have a record of this file
        having been uploaded earlier.

        I return a FileResults object, synchronously. If r.was_uploaded()
        returns False, you should upload the file. When you are finished
        uploading it, call r.did_upload(filecap), so I can update my
        database.

        If was_uploaded() returns a filecap, you might be able to avoid an
        upload. Call r.should_check(), and if it says False, you can skip the
        upload and use the filecap returned by was_uploaded().

        If should_check() returns True, you should perform a filecheck on the
        filecap returned by was_uploaded(). If the check indicates the file
        is healthy, please call r.did_check_healthy(checker_results) so I can
        update the database, using the de-JSONized response from the webapi
        t=check call for 'checker_results'. If the check indicates the file
        is not healthy, please upload the file and call r.did_upload(filecap)
        when you're done.

        If use_timestamps=True (the default), I will compare ctime and mtime
        of the local file against an entry in my database, and consider the
        file to be unchanged if ctime, mtime, and filesize are all the same
        as the earlier version. If use_timestamps=False, I will not trust the
        timestamps, so more files (perhaps all) will be marked as needing
        upload. A future version of this database may hash the file to make
        equality decisions, in which case use_timestamps=False will not
        always imply r.must_upload()==True.

        'path' points to a local file on disk, possibly relative to the
        current working directory. The database stores absolute pathnames.
        """

        path = abspath_expanduser_unicode(path)

        # TODO: consider using get_pathinfo.
        s = os.stat(path)
        size = s[stat.ST_SIZE]
        ctime = s[stat.ST_CTIME]
        mtime = s[stat.ST_MTIME]

        now = time.time()
        c = self.cursor

        c.execute(
            "SELECT size,mtime,ctime,fileid"
            " FROM local_files"
            " WHERE path=?", (path, ))
        row = self.cursor.fetchone()
        if not row:
            return FileResult(self, None, False, path, mtime, ctime, size)
        (last_size, last_mtime, last_ctime, last_fileid) = row

        c.execute(
            "SELECT caps.filecap, last_upload.last_checked"
            " FROM caps,last_upload"
            " WHERE caps.fileid=? AND last_upload.fileid=?",
            (last_fileid, last_fileid))
        row2 = c.fetchone()

        if ((last_size != size or not use_timestamps or last_mtime != mtime
             or last_ctime != ctime)  # the file has been changed
                or
            (not row2)  # we somehow forgot where we put the file last time
            ):
            c.execute("DELETE FROM local_files WHERE path=?", (path, ))
            self.connection.commit()
            return FileResult(self, None, False, path, mtime, ctime, size)

        # at this point, we're allowed to assume the file hasn't been changed
        (filecap, last_checked) = row2
        age = now - last_checked

        probability = ((age - self.NO_CHECK_BEFORE) /
                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
        probability = min(max(probability, 0.0), 1.0)
        should_check = bool(random.random() < probability)

        return FileResult(self, to_bytes(filecap), should_check, path, mtime,
                          ctime, size)
Esempio n. 19
0
    def render(self, req):
        gte = static.getTypeAndEncoding
        ctype, encoding = gte(self.filename,
                              static.File.contentTypes,
                              static.File.contentEncodings,
                              defaultType="text/plain")
        req.setHeader("content-type", ctype)
        if encoding:
            req.setHeader("content-encoding", encoding)

        if boolean_of_arg(get_arg(req, "save", "False")):
            # tell the browser to save the file rather display it we don't
            # try to encode the filename, instead we echo back the exact same
            # bytes we were given in the URL. See the comment in
            # FileNodeHandler.render_GET for the sad details.
            req.setHeader("content-disposition",
                          b'attachment; filename="%s"' % self.filename)

        filesize = self.filenode.get_size()
        assert isinstance(filesize, (int,long)), filesize
        first, size = 0, None
        contentsize = filesize
        req.setHeader("accept-ranges", "bytes")

        # TODO: for mutable files, use the roothash. For LIT, hash the data.
        # or maybe just use the URI for CHK and LIT.
        rangeheader = req.getHeader('range')
        if rangeheader:
            ranges = self.parse_range_header(rangeheader)

            # ranges = None means the header didn't parse, so ignore
            # the header as if it didn't exist.  If is more than one
            # range, then just return the first for now, until we can
            # generate multipart/byteranges.
            if ranges is not None:
                first, last = ranges[0]

                if first >= filesize:
                    raise WebError('First beyond end of file',
                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
                else:
                    first = max(0, first)
                    last = min(filesize-1, last)

                    req.setResponseCode(http.PARTIAL_CONTENT)
                    req.setHeader('content-range',"bytes %s-%s/%s" %
                                  (str(first), str(last),
                                   str(filesize)))
                    contentsize = last - first + 1
                    size = contentsize

        req.setHeader("content-length", b"%d" % contentsize)
        if req.method == b"HEAD":
            return b""

        d = self.filenode.read(req, first, size)

        def _error(f):
            if f.check(defer.CancelledError):
                # The HTTP connection was lost and we no longer have anywhere
                # to send our result.  Let this pass through.
                return f
            if req.startedWriting:
                # The content-type is already set, and the response code has
                # already been sent, so we can't provide a clean error
                # indication. We can emit text (which a browser might
                # interpret as something else), and if we sent a Size header,
                # they might notice that we've truncated the data. Keep the
                # error message small to improve the chances of having our
                # error response be shorter than the intended results.
                #
                # We don't have a lot of options, unfortunately.
                return b"problem during download\n"
            else:
                # We haven't written anything yet, so we can provide a
                # sensible error message.
                return f
        d.addCallbacks(
            lambda ignored: None,
            _error,
        )
        return d