Beispiel #1
0
    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
Beispiel #2
0
    def _parse_and_store_UEB(self, UEB_s):
        # Note: the UEB contains needed_shares and total_shares. These are
        # redundant and inferior (the filecap contains the authoritative
        # values). However, because it is possible to encode the same file in
        # multiple ways, and the encoders might choose (poorly) to use the
        # same key for both (therefore getting the same SI), we might
        # encounter shares for both types. The UEB hashes will be different,
        # however, and we'll disregard the "other" encoding's shares as
        # corrupted.

        # therefore, we ignore d['total_shares'] and d['needed_shares'].

        d = uri.unpack_extension(UEB_s)

        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
                ueb=repr(uri.unpack_extension_readable(UEB_s)),
                vcap=self._verifycap.to_string(),
                level=log.NOISY, parent=self._lp, umid="cVqZnA")

        k, N = self._verifycap.needed_shares, self._verifycap.total_shares

        self.segment_size = d['segment_size']
        self._segsize_observers.fire(self.segment_size)

        r = self._calculate_sizes(self.segment_size)
        self.tail_segment_size = r["tail_segment_size"]
        self.tail_segment_padded = r["tail_segment_padded"]
        self.num_segments = r["num_segments"]
        self.block_size = r["block_size"]
        self.tail_block_size = r["tail_block_size"]
        log.msg("actual sizes: %s" % (r,),
                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
        if (self.segment_size == self.guessed_segment_size
            and self.num_segments == self.guessed_num_segments):
            log.msg("my guess was right!",
                    level=log.NOISY, parent=self._lp, umid="x340Ow")
        else:
            log.msg("my guess was wrong! Extra round trips for me.",
                    level=log.NOISY, parent=self._lp, umid="tb7RJw")

        # zfec.Decode() instantiation is fast, but still, let's use the same
        # codec instance for all but the last segment. 3-of-10 takes 15us on
        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
        # 2.5ms, worst-case 254-of-255 is 9.3ms
        self._codec = CRSDecoder()
        self._codec.set_params(self.segment_size, k, N)


        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B. self.ciphertext_hash_tree was a guess before:
        # this is where we create it for real.
        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
        self.ciphertext_hash_tree_leaves = self.num_segments
        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})

        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
Beispiel #3
0
    def test_pack(self):
        data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar")}
        ext = uri.pack_extension(data)
        d = uri.unpack_extension(ext)
        self.failUnlessReallyEqual(d["stuff"], "value")
        self.failUnlessReallyEqual(d["size"], 12)
        self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar"))

        readable = uri.unpack_extension_readable(ext)
        self.failUnlessReallyEqual(readable["needed_shares"], 3)
        self.failUnlessReallyEqual(readable["stuff"], "value")
        self.failUnlessReallyEqual(readable["size"], 12)
        self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar")))
        self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext)))
Beispiel #4
0
    def test_pack(self):
        data = {"stuff": "value",
                "size": 12,
                "needed_shares": 3,
                "big_hash": hashutil.tagged_hash("foo", "bar"),
                }
        ext = uri.pack_extension(data)
        d = uri.unpack_extension(ext)
        self.failUnlessReallyEqual(d["stuff"], "value")
        self.failUnlessReallyEqual(d["size"], 12)
        self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar"))

        readable = uri.unpack_extension_readable(ext)
        self.failUnlessReallyEqual(readable["needed_shares"], 3)
        self.failUnlessReallyEqual(readable["stuff"], "value")
        self.failUnlessReallyEqual(readable["size"], 12)
        self.failUnlessReallyEqual(readable["big_hash"],
                             base32.b2a(hashutil.tagged_hash("foo", "bar")))
        self.failUnlessReallyEqual(readable["UEB_hash"],
                             base32.b2a(hashutil.uri_extension_hash(ext)))
Beispiel #5
0
    def _parse_and_validate(self, data):
        self.share_size = mathutil.div_ceil(self._verifycap.size,
                                            self._verifycap.needed_shares)

        d = uri.unpack_extension(data)

        # There are several kinds of things that can be found in a UEB.
        # First, things that we really need to learn from the UEB in order to
        # do this download. Next: things which are optional but not redundant
        # -- if they are present in the UEB they will get used. Next, things
        # that are optional and redundant. These things are required to be
        # consistent: they don't have to be in the UEB, but if they are in
        # the UEB then they will be checked for consistency with the
        # already-known facts, and if they are inconsistent then an exception
        # will be raised. These things aren't actually used -- they are just
        # tested for consistency and ignored. Finally: things which are
        # deprecated -- they ought not be in the UEB at all, and if they are
        # present then a warning will be logged but they are otherwise
        # ignored.

        # First, things that we really need to learn from the UEB:
        # segment_size, crypttext_root_hash, and share_root_hash.
        self.segment_size = d['segment_size']

        self.block_size = mathutil.div_ceil(self.segment_size,
                                            self._verifycap.needed_shares)
        self.num_segments = mathutil.div_ceil(self._verifycap.size,
                                              self.segment_size)

        self.tail_data_size = self._verifycap.size % self.segment_size
        if not self.tail_data_size:
            self.tail_data_size = self.segment_size
        # padding for erasure code
        self.tail_segment_size = mathutil.next_multiple(self.tail_data_size,
                                                        self._verifycap.needed_shares)

        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B.
        self.crypttext_root_hash = d['crypttext_root_hash']

        self.share_root_hash = d['share_root_hash']


        # Next: things that are optional and not redundant: crypttext_hash
        if d.has_key('crypttext_hash'):
            self.crypttext_hash = d['crypttext_hash']
            if len(self.crypttext_hash) != CRYPTO_VAL_SIZE:
                raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),))


        # Next: things that are optional, redundant, and required to be
        # consistent: codec_name, codec_params, tail_codec_params,
        # num_segments, size, needed_shares, total_shares
        if d.has_key('codec_name'):
            if d['codec_name'] != "crs":
                raise UnsupportedErasureCodec(d['codec_name'])

        if d.has_key('codec_params'):
            ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])
            if ucpss != self.segment_size:
                raise BadURIExtension("inconsistent erasure code params: "
                                      "ucpss: %s != self.segment_size: %s" %
                                      (ucpss, self.segment_size))
            if ucpns != self._verifycap.needed_shares:
                raise BadURIExtension("inconsistent erasure code params: ucpns: %s != "
                                      "self._verifycap.needed_shares: %s" %
                                      (ucpns, self._verifycap.needed_shares))
            if ucpts != self._verifycap.total_shares:
                raise BadURIExtension("inconsistent erasure code params: ucpts: %s != "
                                      "self._verifycap.total_shares: %s" %
                                      (ucpts, self._verifycap.total_shares))

        if d.has_key('tail_codec_params'):
            utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])
            if utcpss != self.tail_segment_size:
                raise BadURIExtension("inconsistent erasure code params: utcpss: %s != "
                                      "self.tail_segment_size: %s, self._verifycap.size: %s, "
                                      "self.segment_size: %s, self._verifycap.needed_shares: %s"
                                      % (utcpss, self.tail_segment_size, self._verifycap.size,
                                         self.segment_size, self._verifycap.needed_shares))
            if utcpns != self._verifycap.needed_shares:
                raise BadURIExtension("inconsistent erasure code params: utcpns: %s != "
                                      "self._verifycap.needed_shares: %s" % (utcpns,
                                                                             self._verifycap.needed_shares))
            if utcpts != self._verifycap.total_shares:
                raise BadURIExtension("inconsistent erasure code params: utcpts: %s != "
                                      "self._verifycap.total_shares: %s" % (utcpts,
                                                                            self._verifycap.total_shares))

        if d.has_key('num_segments'):
            if d['num_segments'] != self.num_segments:
                raise BadURIExtension("inconsistent num_segments: size: %s, "
                                      "segment_size: %s, computed_num_segments: %s, "
                                      "ueb_num_segments: %s" % (self._verifycap.size,
                                                                self.segment_size,
                                                                self.num_segments, d['num_segments']))

        if d.has_key('size'):
            if d['size'] != self._verifycap.size:
                raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" %
                                      (self._verifycap.size, d['size']))

        if d.has_key('needed_shares'):
            if d['needed_shares'] != self._verifycap.needed_shares:
                raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB "
                                      "needed shares: %s" % (self._verifycap.total_shares,
                                                             d['needed_shares']))

        if d.has_key('total_shares'):
            if d['total_shares'] != self._verifycap.total_shares:
                raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB "
                                      "total shares: %s" % (self._verifycap.total_shares,
                                                            d['total_shares']))

        # Finally, things that are deprecated and ignored: plaintext_hash,
        # plaintext_root_hash
        if d.get('plaintext_hash'):
            log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons "
                    "and is no longer used.  Ignoring.  %s" % (self,))
        if d.get('plaintext_root_hash'):
            log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security "
                    "reasons and is no longer used.  Ignoring.  %s" % (self,))

        return self
Beispiel #6
0
 def _got_uri_extension(self, ueb):
     self.log("_got_uri_extension", level=log.NOISY)
     self._ueb_hash = hashutil.uri_extension_hash(ueb)
     self._ueb_data = uri.unpack_extension(ueb)
Beispiel #7
0
    def _parse_and_validate(self, data):
        self.share_size = mathutil.div_ceil(self._verifycap.size,
                                            self._verifycap.needed_shares)

        d = uri.unpack_extension(data)

        # There are several kinds of things that can be found in a UEB.
        # First, things that we really need to learn from the UEB in order to
        # do this download. Next: things which are optional but not redundant
        # -- if they are present in the UEB they will get used. Next, things
        # that are optional and redundant. These things are required to be
        # consistent: they don't have to be in the UEB, but if they are in
        # the UEB then they will be checked for consistency with the
        # already-known facts, and if they are inconsistent then an exception
        # will be raised. These things aren't actually used -- they are just
        # tested for consistency and ignored. Finally: things which are
        # deprecated -- they ought not be in the UEB at all, and if they are
        # present then a warning will be logged but they are otherwise
        # ignored.

        # First, things that we really need to learn from the UEB:
        # segment_size, crypttext_root_hash, and share_root_hash.
        self.segment_size = d['segment_size']

        self.block_size = mathutil.div_ceil(self.segment_size,
                                            self._verifycap.needed_shares)
        self.num_segments = mathutil.div_ceil(self._verifycap.size,
                                              self.segment_size)

        self.tail_data_size = self._verifycap.size % self.segment_size
        if not self.tail_data_size:
            self.tail_data_size = self.segment_size
        # padding for erasure code
        self.tail_segment_size = mathutil.next_multiple(
            self.tail_data_size, self._verifycap.needed_shares)

        # Ciphertext hash tree root is mandatory, so that there is at most
        # one ciphertext that matches this read-cap or verify-cap. The
        # integrity check on the shares is not sufficient to prevent the
        # original encoder from creating some shares of file A and other
        # shares of file B.
        self.crypttext_root_hash = d['crypttext_root_hash']

        self.share_root_hash = d['share_root_hash']

        # Next: things that are optional and not redundant: crypttext_hash
        if d.has_key('crypttext_hash'):
            self.crypttext_hash = d['crypttext_hash']
            if len(self.crypttext_hash) != CRYPTO_VAL_SIZE:
                raise BadURIExtension(
                    'crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes'
                    % (len(self.crypttext_hash), ))

        # Next: things that are optional, redundant, and required to be
        # consistent: codec_name, codec_params, tail_codec_params,
        # num_segments, size, needed_shares, total_shares
        if d.has_key('codec_name'):
            if d['codec_name'] != "crs":
                raise UnsupportedErasureCodec(d['codec_name'])

        if d.has_key('codec_params'):
            ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])
            if ucpss != self.segment_size:
                raise BadURIExtension("inconsistent erasure code params: "
                                      "ucpss: %s != self.segment_size: %s" %
                                      (ucpss, self.segment_size))
            if ucpns != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: ucpns: %s != "
                    "self._verifycap.needed_shares: %s" %
                    (ucpns, self._verifycap.needed_shares))
            if ucpts != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: ucpts: %s != "
                    "self._verifycap.total_shares: %s" %
                    (ucpts, self._verifycap.total_shares))

        if d.has_key('tail_codec_params'):
            utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])
            if utcpss != self.tail_segment_size:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpss: %s != "
                    "self.tail_segment_size: %s, self._verifycap.size: %s, "
                    "self.segment_size: %s, self._verifycap.needed_shares: %s"
                    % (utcpss, self.tail_segment_size, self._verifycap.size,
                       self.segment_size, self._verifycap.needed_shares))
            if utcpns != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpns: %s != "
                    "self._verifycap.needed_shares: %s" %
                    (utcpns, self._verifycap.needed_shares))
            if utcpts != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent erasure code params: utcpts: %s != "
                    "self._verifycap.total_shares: %s" %
                    (utcpts, self._verifycap.total_shares))

        if d.has_key('num_segments'):
            if d['num_segments'] != self.num_segments:
                raise BadURIExtension(
                    "inconsistent num_segments: size: %s, "
                    "segment_size: %s, computed_num_segments: %s, "
                    "ueb_num_segments: %s" %
                    (self._verifycap.size, self.segment_size,
                     self.num_segments, d['num_segments']))

        if d.has_key('size'):
            if d['size'] != self._verifycap.size:
                raise BadURIExtension(
                    "inconsistent size: URI size: %s, UEB size: %s" %
                    (self._verifycap.size, d['size']))

        if d.has_key('needed_shares'):
            if d['needed_shares'] != self._verifycap.needed_shares:
                raise BadURIExtension(
                    "inconsistent needed shares: URI needed shares: %s, UEB "
                    "needed shares: %s" %
                    (self._verifycap.total_shares, d['needed_shares']))

        if d.has_key('total_shares'):
            if d['total_shares'] != self._verifycap.total_shares:
                raise BadURIExtension(
                    "inconsistent total shares: URI total shares: %s, UEB "
                    "total shares: %s" %
                    (self._verifycap.total_shares, d['total_shares']))

        # Finally, things that are deprecated and ignored: plaintext_hash,
        # plaintext_root_hash
        if d.get('plaintext_hash'):
            log.msg(
                "Found plaintext_hash in UEB. This field is deprecated for security reasons "
                "and is no longer used.  Ignoring.  %s" % (self, ))
        if d.get('plaintext_root_hash'):
            log.msg(
                "Found plaintext_root_hash in UEB. This field is deprecated for security "
                "reasons and is no longer used.  Ignoring.  %s" % (self, ))

        return self
Beispiel #8
0
 def _got_uri_extension(self, ueb):
     self.log("_got_uri_extension", level=log.NOISY)
     self._ueb_hash = hashutil.uri_extension_hash(ueb)
     self._ueb_data = uri.unpack_extension(ueb)