def _describe_immutable_share(abs_sharefile, now, si_s, out): class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min(lease.get_expiration_time() for lease in sf.get_leases()) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, str( ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)), file=out)
def _parse_and_store_UEB(self, UEB_s): # Note: the UEB contains needed_shares and total_shares. These are # redundant and inferior (the filecap contains the authoritative # values). However, because it is possible to encode the same file in # multiple ways, and the encoders might choose (poorly) to use the # same key for both (therefore getting the same SI), we might # encounter shares for both types. The UEB hashes will be different, # however, and we'll disregard the "other" encoding's shares as # corrupted. # therefore, we ignore d['total_shares'] and d['needed_shares']. d = uri.unpack_extension(UEB_s) log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", ueb=repr(uri.unpack_extension_readable(UEB_s)), vcap=self._verifycap.to_string(), level=log.NOISY, parent=self._lp, umid="cVqZnA") k, N = self._verifycap.needed_shares, self._verifycap.total_shares self.segment_size = d['segment_size'] self._segsize_observers.fire(self.segment_size) r = self._calculate_sizes(self.segment_size) self.tail_segment_size = r["tail_segment_size"] self.tail_segment_padded = r["tail_segment_padded"] self.num_segments = r["num_segments"] self.block_size = r["block_size"] self.tail_block_size = r["tail_block_size"] log.msg("actual sizes: %s" % (r,), level=log.NOISY, parent=self._lp, umid="PY6P5Q") if (self.segment_size == self.guessed_segment_size and self.num_segments == self.guessed_num_segments): log.msg("my guess was right!", level=log.NOISY, parent=self._lp, umid="x340Ow") else: log.msg("my guess was wrong! Extra round trips for me.", level=log.NOISY, parent=self._lp, umid="tb7RJw") # zfec.Decode() instantiation is fast, but still, let's use the same # codec instance for all but the last segment. 3-of-10 takes 15us on # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is # 2.5ms, worst-case 254-of-255 is 9.3ms self._codec = CRSDecoder() self._codec.set_params(self.segment_size, k, N) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.ciphertext_hash_tree was a guess before: # this is where we create it for real. self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) self.ciphertext_hash_tree_leaves = self.num_segments self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) self.share_hash_tree.set_hashes({0: d['share_root_hash']})
def test_pack(self): data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar")} ext = uri.pack_extension(data) d = uri.unpack_extension(ext) self.failUnlessReallyEqual(d["stuff"], "value") self.failUnlessReallyEqual(d["size"], 12) self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) readable = uri.unpack_extension_readable(ext) self.failUnlessReallyEqual(readable["needed_shares"], 3) self.failUnlessReallyEqual(readable["stuff"], "value") self.failUnlessReallyEqual(readable["size"], 12) self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar"))) self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext)))
def test_pack(self): data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar"), } ext = uri.pack_extension(data) d = uri.unpack_extension(ext) self.failUnlessReallyEqual(d["stuff"], "value") self.failUnlessReallyEqual(d["size"], 12) self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) readable = uri.unpack_extension_readable(ext) self.failUnlessReallyEqual(readable["needed_shares"], 3) self.failUnlessReallyEqual(readable["stuff"], "value") self.failUnlessReallyEqual(readable["size"], 12) self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar"))) self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext)))
def describe_share(abs_sharefile, si_s, shnum_s, now, out): from allmydata import uri from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_share from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output import struct f = open(abs_sharefile, "rb") prefix = f.read(32) if prefix == MutableShareFile.MAGIC: # mutable share m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) expiration_time = min( [lease.expiration_time for (i, lease) in m._enumerate_leases(f)]) expiration = max(0, expiration_time - now) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == b"\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == b"\x01": share_type = "MDMF" if share_type == "SDMF": f.seek(m.DATA_OFFSET) data = f.read(min(data_length, 2000)) try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f.seek(m.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces print("SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, unicode(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) elif share_type == "MDMF": from allmydata.mutable.layout import MDMFSlotReadProxy fake_shnum = 0 # TODO: factor this out with dump_MDMF_share() class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where, length) in readvs: f.seek(m.DATA_OFFSET + where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) p = ShareDumper(None, "fake-si", fake_shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that # they run synchronously when not actually talking to a # remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print("MDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, unicode(base32.b2a(root_hash), "utf-8"), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) elif struct.unpack(">L", prefix[:4]) == (1, ): # immutable class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()]) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, unicode( ueb_hash, "utf-8"), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out) f.close()
def dump_immutable_chk_share(f, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy from allmydata.util.encodingutil import quote_output, to_bytes # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, None, '') offsets = bp._parse_offsets(f.read_share_data(0, 0x44)) print("%20s: %d" % ("version", bp._version), file=out) seek = offsets['uri_extension'] length = struct.unpack(bp._fieldstruct, f.read_share_data(seek, bp._fieldsize))[0] seek += bp._fieldsize UEB_data = f.read_share_data(seek, length) unpacked = uri.unpack_extension_readable(UEB_data) keys1 = ("size", "num_segments", "segment_size", "needed_shares", "total_shares") keys2 = ("codec_name", "codec_params", "tail_codec_params") keys3 = ("plaintext_hash", "plaintext_root_hash", "crypttext_hash", "crypttext_root_hash", "share_root_hash", "UEB_hash") display_keys = {"size": "file_size"} def to_string(v): if isinstance(v, bytes): return unicode(v, "utf-8") else: return str(v) for k in keys1: if k in unpacked: dk = display_keys.get(k, k) print("%20s: %s" % (dk, to_string(unpacked[k])), file=out) print(file=out) for k in keys2: if k in unpacked: dk = display_keys.get(k, k) print("%20s: %s" % (dk, to_string(unpacked[k])), file=out) print(file=out) for k in keys3: if k in unpacked: dk = display_keys.get(k, k) print("%20s: %s" % (dk, to_string(unpacked[k])), file=out) leftover = set(unpacked.keys()) - set(keys1 + keys2 + keys3) if leftover: print(file=out) print("LEFTOVER:", file=out) for k in sorted(leftover): print("%20s: %s" % (k, to_string(unpacked[k])), file=out) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) uri_extension_hash = base32.a2b(unpacked["UEB_hash"]) u = uri.CHKFileVerifierURI(storage_index, uri_extension_hash, unpacked["needed_shares"], unpacked["total_shares"], unpacked["size"]) verify_cap = u.to_string() print("%20s: %s" % ("verify-cap", quote_output(verify_cap, quotemarks=False)), file=out) sizes = {} sizes['data'] = (offsets['plaintext_hash_tree'] - offsets['data']) sizes['validation'] = (offsets['uri_extension'] - offsets['plaintext_hash_tree']) sizes['uri-extension'] = len(UEB_data) print(file=out) print(" Size of data within the share:", file=out) for k in sorted(sizes): print("%20s: %s" % (k, sizes[k]), file=out) if options['offsets']: print(file=out) print(" Section Offsets:", file=out) print("%20s: %s" % ("share data", f._data_offset), file=out) for k in [ "data", "plaintext_hash_tree", "crypttext_hash_tree", "block_hashes", "share_hashes", "uri_extension" ]: name = {"data": "block data"}.get(k, k) offset = f._data_offset + offsets[k] print(" %20s: %s (0x%x)" % (name, offset, offset), file=out) print("%20s: %s" % ("leases", f._lease_offset), file=out)
def dump_immutable_chk_share(f, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy from allmydata.util.encodingutil import quote_output, to_str # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, '', '') offsets = bp._parse_offsets(f.read_share_data(0, 0x44)) print >>out, "%20s: %d" % ("version", bp._version) seek = offsets['uri_extension'] length = struct.unpack(bp._fieldstruct, f.read_share_data(seek, bp._fieldsize))[0] seek += bp._fieldsize UEB_data = f.read_share_data(seek, length) unpacked = uri.unpack_extension_readable(UEB_data) keys1 = ("size", "num_segments", "segment_size", "needed_shares", "total_shares") keys2 = ("codec_name", "codec_params", "tail_codec_params") keys3 = ("plaintext_hash", "plaintext_root_hash", "crypttext_hash", "crypttext_root_hash", "share_root_hash", "UEB_hash") display_keys = {"size": "file_size"} for k in keys1: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) print >>out for k in keys2: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) print >>out for k in keys3: if k in unpacked: dk = display_keys.get(k, k) print >>out, "%20s: %s" % (dk, unpacked[k]) leftover = set(unpacked.keys()) - set(keys1 + keys2 + keys3) if leftover: print >>out print >>out, "LEFTOVER:" for k in sorted(leftover): print >>out, "%20s: %s" % (k, unpacked[k]) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_str(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) uri_extension_hash = base32.a2b(unpacked["UEB_hash"]) u = uri.CHKFileVerifierURI(storage_index, uri_extension_hash, unpacked["needed_shares"], unpacked["total_shares"], unpacked["size"]) verify_cap = u.to_string() print >>out, "%20s: %s" % ("verify-cap", quote_output(verify_cap, quotemarks=False)) sizes = {} sizes['data'] = (offsets['plaintext_hash_tree'] - offsets['data']) sizes['validation'] = (offsets['uri_extension'] - offsets['plaintext_hash_tree']) sizes['uri-extension'] = len(UEB_data) print >>out print >>out, " Size of data within the share:" for k in sorted(sizes): print >>out, "%20s: %s" % (k, sizes[k]) if options['offsets']: print >>out print >>out, " Section Offsets:" print >>out, "%20s: %s" % ("share data", f._data_offset) for k in ["data", "plaintext_hash_tree", "crypttext_hash_tree", "block_hashes", "share_hashes", "uri_extension"]: name = {"data": "block data"}.get(k,k) offset = f._data_offset + offsets[k] print >>out, " %20s: %s (0x%x)" % (name, offset, offset) print >>out, "%20s: %s" % ("leases", f._lease_offset)
ReadBucketProxy.__init__(self, "", "", "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()] ) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print >>out, "CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, ueb_hash, expiration, quote_output(abs_sharefile)) else: print >>out, "UNKNOWN really-unknown %s" % quote_output(abs_sharefile) f.close()
ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()] ) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print >>out, "CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, ueb_hash, expiration, quote_output(abs_sharefile)) else: print >>out, "UNKNOWN really-unknown %s" % quote_output(abs_sharefile) f.close()
def describe_share(abs_sharefile, si_s, shnum_s, now, out): from allmydata import uri from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_share from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output import struct f = open(abs_sharefile, "rb") prefix = f.read(32) if prefix == MutableShareFile.MAGIC: # mutable share m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) expiration_time = min( [lease.expiration_time for (i,lease) in m._enumerate_leases(f)] ) expiration = max(0, expiration_time - now) share_type = "unknown" f.seek(m.DATA_OFFSET) version = f.read(1) if version == "\x00": # this slot contains an SMDF share share_type = "SDMF" elif version == "\x01": share_type = "MDMF" if share_type == "SDMF": f.seek(m.DATA_OFFSET) data = f.read(min(data_length, 2000)) try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f.seek(m.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces print("SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, base32.b2a(root_hash), expiration, quote_output(abs_sharefile)), file=out) elif share_type == "MDMF": from allmydata.mutable.layout import MDMFSlotReadProxy fake_shnum = 0 # TODO: factor this out with dump_MDMF_share() class ShareDumper(MDMFSlotReadProxy): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where,length) in readvs: f.seek(m.DATA_OFFSET+where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) p = ShareDumper(None, "fake-si", fake_shnum) def extract(func): stash = [] # these methods return Deferreds, but we happen to know that # they run synchronously when not actually talking to a # remote server d = func() d.addCallback(stash.append) return stash[0] verinfo = extract(p.get_verinfo) (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, offsets) = verinfo print("MDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, seqnum, base32.b2a(root_hash), expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) elif struct.unpack(">L", prefix[:4]) == (1,): # immutable class ImmediateReadBucketProxy(ReadBucketProxy): def __init__(self, sf): self.sf = sf ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "<ImmediateReadBucketProxy>" def _read(self, offset, size): return defer.succeed(sf.read_share_data(offset, size)) # use a ReadBucketProxy to parse the bucket and find the uri extension sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) expiration_time = min( [lease.expiration_time for lease in sf.get_leases()] ) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) k = unpacked["needed_shares"] N = unpacked["total_shares"] filesize = unpacked["size"] ueb_hash = unpacked["UEB_hash"] print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, ueb_hash, expiration, quote_output(abs_sharefile)), file=out) else: print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out) f.close()