def do_test(self, size, required_shares, max_shares, fewer_shares=None): data0s = [os.urandom(mathutil.div_ceil(size, required_shares)) for i in range(required_shares)] enc = CRSEncoder() enc.set_params(size, required_shares, max_shares) params = enc.get_params() assert params == (size, required_shares, max_shares) serialized_params = enc.get_serialized_params() self.assertEqual(parse_params(serialized_params), params) log.msg("params: %s" % (params,)) d = enc.encode(data0s) def _done_encoding_all(shares_and_shareids): (shares, shareids) = shares_and_shareids self.failUnlessEqual(len(shares), max_shares) self.shares = shares self.shareids = shareids d.addCallback(_done_encoding_all) if fewer_shares is not None: # also validate that the desired_shareids= parameter works desired_shareids = random.sample(list(range(max_shares)), fewer_shares) d.addCallback(lambda res: enc.encode(data0s, desired_shareids)) def _check_fewer_shares(some_shares_and_their_shareids): (some_shares, their_shareids) = some_shares_and_their_shareids self.failUnlessEqual(tuple(their_shareids), tuple(desired_shareids)) d.addCallback(_check_fewer_shares) def _decode(shares_and_shareids): (shares, shareids) = shares_and_shareids dec = CRSDecoder() dec.set_params(*params) d1 = dec.decode(shares, shareids) return d1 def _check_data(decoded_shares): self.failUnlessEqual(len(b''.join(decoded_shares)), len(b''.join(data0s))) self.failUnlessEqual(len(decoded_shares), len(data0s)) for (i, (x, y)) in enumerate(zip(data0s, decoded_shares)): self.failUnlessEqual(x, y, "%s: %r != %r.... first share was %r" % (str(i), x, y, data0s[0],)) self.failUnless(b''.join(decoded_shares) == b''.join(data0s), "%s" % ("???",)) # 0data0sclipped = tuple(data0s) # data0sclipped[-1] = # self.failUnless(tuple(decoded_shares) == tuple(data0s)) def _decode_some(res): log.msg("_decode_some") # decode with a minimal subset of the shares some_shares = self.shares[:required_shares] some_shareids = self.shareids[:required_shares] return _decode((some_shares, some_shareids)) d.addCallback(_decode_some) d.addCallback(_check_data) def _decode_some_random(res): log.msg("_decode_some_random") # use a randomly-selected minimal subset l = random.sample(list(zip(self.shares, self.shareids)), required_shares) some_shares = [ x[0] for x in l ] some_shareids = [ x[1] for x in l ] return _decode((some_shares, some_shareids)) d.addCallback(_decode_some_random) d.addCallback(_check_data) def _decode_multiple(res): log.msg("_decode_multiple") # make sure we can re-use the decoder object shares1 = random.sample(self.shares, required_shares) sharesl1 = random.sample(list(zip(self.shares, self.shareids)), required_shares) shares1 = [ x[0] for x in sharesl1 ] shareids1 = [ x[1] for x in sharesl1 ] sharesl2 = random.sample(list(zip(self.shares, self.shareids)), required_shares) shares2 = [ x[0] for x in sharesl2 ] shareids2 = [ x[1] for x in sharesl2 ] dec = CRSDecoder() dec.set_params(*params) d1 = dec.decode(shares1, shareids1) d1.addCallback(_check_data) d1.addCallback(lambda res: dec.decode(shares2, shareids2)) d1.addCallback(_check_data) return d1 d.addCallback(_decode_multiple) return d
def _parse_and_validate(self, data): self.share_size = mathutil.div_ceil(self._verifycap.size, self._verifycap.needed_shares) d = uri.unpack_extension(data) # There are several kinds of things that can be found in a UEB. # First, things that we really need to learn from the UEB in order to # do this download. Next: things which are optional but not redundant # -- if they are present in the UEB they will get used. Next, things # that are optional and redundant. These things are required to be # consistent: they don't have to be in the UEB, but if they are in # the UEB then they will be checked for consistency with the # already-known facts, and if they are inconsistent then an exception # will be raised. These things aren't actually used -- they are just # tested for consistency and ignored. Finally: things which are # deprecated -- they ought not be in the UEB at all, and if they are # present then a warning will be logged but they are otherwise # ignored. # First, things that we really need to learn from the UEB: # segment_size, crypttext_root_hash, and share_root_hash. self.segment_size = d['segment_size'] self.block_size = mathutil.div_ceil(self.segment_size, self._verifycap.needed_shares) self.num_segments = mathutil.div_ceil(self._verifycap.size, self.segment_size) self.tail_data_size = self._verifycap.size % self.segment_size if not self.tail_data_size: self.tail_data_size = self.segment_size # padding for erasure code self.tail_segment_size = mathutil.next_multiple(self.tail_data_size, self._verifycap.needed_shares) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.crypttext_root_hash = d['crypttext_root_hash'] self.share_root_hash = d['share_root_hash'] # Next: things that are optional and not redundant: crypttext_hash if d.has_key('crypttext_hash'): self.crypttext_hash = d['crypttext_hash'] if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),)) # Next: things that are optional, redundant, and required to be # consistent: codec_name, codec_params, tail_codec_params, # num_segments, size, needed_shares, total_shares if d.has_key('codec_name'): if d['codec_name'] != "crs": raise UnsupportedErasureCodec(d['codec_name']) if d.has_key('codec_params'): ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) if ucpss != self.segment_size: raise BadURIExtension("inconsistent erasure code params: " "ucpss: %s != self.segment_size: %s" % (ucpss, self.segment_size)) if ucpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: ucpns: %s != " "self._verifycap.needed_shares: %s" % (ucpns, self._verifycap.needed_shares)) if ucpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: ucpts: %s != " "self._verifycap.total_shares: %s" % (ucpts, self._verifycap.total_shares)) if d.has_key('tail_codec_params'): utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) if utcpss != self.tail_segment_size: raise BadURIExtension("inconsistent erasure code params: utcpss: %s != " "self.tail_segment_size: %s, self._verifycap.size: %s, " "self.segment_size: %s, self._verifycap.needed_shares: %s" % (utcpss, self.tail_segment_size, self._verifycap.size, self.segment_size, self._verifycap.needed_shares)) if utcpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: utcpns: %s != " "self._verifycap.needed_shares: %s" % (utcpns, self._verifycap.needed_shares)) if utcpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: utcpts: %s != " "self._verifycap.total_shares: %s" % (utcpts, self._verifycap.total_shares)) if d.has_key('num_segments'): if d['num_segments'] != self.num_segments: raise BadURIExtension("inconsistent num_segments: size: %s, " "segment_size: %s, computed_num_segments: %s, " "ueb_num_segments: %s" % (self._verifycap.size, self.segment_size, self.num_segments, d['num_segments'])) if d.has_key('size'): if d['size'] != self._verifycap.size: raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" % (self._verifycap.size, d['size'])) if d.has_key('needed_shares'): if d['needed_shares'] != self._verifycap.needed_shares: raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB " "needed shares: %s" % (self._verifycap.total_shares, d['needed_shares'])) if d.has_key('total_shares'): if d['total_shares'] != self._verifycap.total_shares: raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB " "total shares: %s" % (self._verifycap.total_shares, d['total_shares'])) # Finally, things that are deprecated and ignored: plaintext_hash, # plaintext_root_hash if d.get('plaintext_hash'): log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons " "and is no longer used. Ignoring. %s" % (self,)) if d.get('plaintext_root_hash'): log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security " "reasons and is no longer used. Ignoring. %s" % (self,)) return self
def _parse_and_validate(self, data): self.share_size = mathutil.div_ceil(self._verifycap.size, self._verifycap.needed_shares) d = uri.unpack_extension(data) # There are several kinds of things that can be found in a UEB. # First, things that we really need to learn from the UEB in order to # do this download. Next: things which are optional but not redundant # -- if they are present in the UEB they will get used. Next, things # that are optional and redundant. These things are required to be # consistent: they don't have to be in the UEB, but if they are in # the UEB then they will be checked for consistency with the # already-known facts, and if they are inconsistent then an exception # will be raised. These things aren't actually used -- they are just # tested for consistency and ignored. Finally: things which are # deprecated -- they ought not be in the UEB at all, and if they are # present then a warning will be logged but they are otherwise # ignored. # First, things that we really need to learn from the UEB: # segment_size, crypttext_root_hash, and share_root_hash. self.segment_size = d['segment_size'] self.block_size = mathutil.div_ceil(self.segment_size, self._verifycap.needed_shares) self.num_segments = mathutil.div_ceil(self._verifycap.size, self.segment_size) self.tail_data_size = self._verifycap.size % self.segment_size if not self.tail_data_size: self.tail_data_size = self.segment_size # padding for erasure code self.tail_segment_size = mathutil.next_multiple( self.tail_data_size, self._verifycap.needed_shares) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.crypttext_root_hash = d['crypttext_root_hash'] self.share_root_hash = d['share_root_hash'] # Next: things that are optional and not redundant: crypttext_hash if d.has_key('crypttext_hash'): self.crypttext_hash = d['crypttext_hash'] if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: raise BadURIExtension( 'crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash), )) # Next: things that are optional, redundant, and required to be # consistent: codec_name, codec_params, tail_codec_params, # num_segments, size, needed_shares, total_shares if d.has_key('codec_name'): if d['codec_name'] != "crs": raise UnsupportedErasureCodec(d['codec_name']) if d.has_key('codec_params'): ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) if ucpss != self.segment_size: raise BadURIExtension("inconsistent erasure code params: " "ucpss: %s != self.segment_size: %s" % (ucpss, self.segment_size)) if ucpns != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent erasure code params: ucpns: %s != " "self._verifycap.needed_shares: %s" % (ucpns, self._verifycap.needed_shares)) if ucpts != self._verifycap.total_shares: raise BadURIExtension( "inconsistent erasure code params: ucpts: %s != " "self._verifycap.total_shares: %s" % (ucpts, self._verifycap.total_shares)) if d.has_key('tail_codec_params'): utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) if utcpss != self.tail_segment_size: raise BadURIExtension( "inconsistent erasure code params: utcpss: %s != " "self.tail_segment_size: %s, self._verifycap.size: %s, " "self.segment_size: %s, self._verifycap.needed_shares: %s" % (utcpss, self.tail_segment_size, self._verifycap.size, self.segment_size, self._verifycap.needed_shares)) if utcpns != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent erasure code params: utcpns: %s != " "self._verifycap.needed_shares: %s" % (utcpns, self._verifycap.needed_shares)) if utcpts != self._verifycap.total_shares: raise BadURIExtension( "inconsistent erasure code params: utcpts: %s != " "self._verifycap.total_shares: %s" % (utcpts, self._verifycap.total_shares)) if d.has_key('num_segments'): if d['num_segments'] != self.num_segments: raise BadURIExtension( "inconsistent num_segments: size: %s, " "segment_size: %s, computed_num_segments: %s, " "ueb_num_segments: %s" % (self._verifycap.size, self.segment_size, self.num_segments, d['num_segments'])) if d.has_key('size'): if d['size'] != self._verifycap.size: raise BadURIExtension( "inconsistent size: URI size: %s, UEB size: %s" % (self._verifycap.size, d['size'])) if d.has_key('needed_shares'): if d['needed_shares'] != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent needed shares: URI needed shares: %s, UEB " "needed shares: %s" % (self._verifycap.total_shares, d['needed_shares'])) if d.has_key('total_shares'): if d['total_shares'] != self._verifycap.total_shares: raise BadURIExtension( "inconsistent total shares: URI total shares: %s, UEB " "total shares: %s" % (self._verifycap.total_shares, d['total_shares'])) # Finally, things that are deprecated and ignored: plaintext_hash, # plaintext_root_hash if d.get('plaintext_hash'): log.msg( "Found plaintext_hash in UEB. This field is deprecated for security reasons " "and is no longer used. Ignoring. %s" % (self, )) if d.get('plaintext_root_hash'): log.msg( "Found plaintext_root_hash in UEB. This field is deprecated for security " "reasons and is no longer used. Ignoring. %s" % (self, )) return self