def _hashsplit_iter(files, progress): assert (BLOB_READ_SIZE > BLOB_MAX) basebits = _helpers.blobbits() fanbits = int(math.log(fanout or 128, 2)) buf = Buf() for inblock in readfile_iter(files, progress): buf.put(inblock) for buf_and_level in _splitbuf(buf, basebits, fanbits): yield buf_and_level if buf.used(): yield buf.get(buf.used()), 0
def _hashsplit_iter(files, progress): assert(BLOB_READ_SIZE > BLOB_MAX) basebits = _helpers.blobbits() fanbits = int(math.log(fanout or 128, 2)) buf = Buf() for inblock in readfile_iter(files, progress): buf.put(inblock) for buf_and_level in _splitbuf(buf, basebits, fanbits): yield buf_and_level if buf.used(): yield buf.get(buf.used()), 0
def test_fanout_behaviour(): # Drop in replacement for bupsplit, but splitting if the int value of a # byte >= BUP_BLOBBITS basebits = _helpers.blobbits() def splitbuf(buf): ofs = 0 for b in buf: b = byte_int(b) ofs += 1 if b >= basebits: return ofs, b return 0, 0 with no_lingering_errors(): old_splitbuf = _helpers.splitbuf _helpers.splitbuf = splitbuf old_BLOB_MAX = hashsplit.BLOB_MAX hashsplit.BLOB_MAX = 4 old_BLOB_READ_SIZE = hashsplit.BLOB_READ_SIZE hashsplit.BLOB_READ_SIZE = 10 old_fanout = hashsplit.fanout hashsplit.fanout = 2 levels = lambda f: [ (len(b), l) for b, l in hashsplit.hashsplit_iter([f], True, None) ] # Return a string of n null bytes z = lambda n: b'\x00' * n # Return a byte which will be split with a level of n sb = lambda n: bytes_from_uint(basebits + n) split_never = BytesIO(z(16)) split_first = BytesIO(z(1) + sb(3) + z(14)) split_end = BytesIO(z(13) + sb(1) + z(2)) split_many = BytesIO( sb(1) + z(3) + sb(2) + z(4) + sb(0) + z(4) + sb(5) + z(1)) WVPASSEQ(levels(split_never), [(4, 0), (4, 0), (4, 0), (4, 0)]) WVPASSEQ(levels(split_first), [(2, 3), (4, 0), (4, 0), (4, 0), (2, 0)]) WVPASSEQ(levels(split_end), [(4, 0), (4, 0), (4, 0), (2, 1), (2, 0)]) WVPASSEQ(levels(split_many), [(1, 1), (4, 2), (4, 0), (1, 0), (4, 0), (1, 5), (1, 0)]) _helpers.splitbuf = old_splitbuf hashsplit.BLOB_MAX = old_BLOB_MAX hashsplit.BLOB_READ_SIZE = old_BLOB_READ_SIZE hashsplit.fanout = old_fanout
def test_fanout_behaviour(): # Drop in replacement for bupsplit, but splitting if the int value of a # byte >= BUP_BLOBBITS basebits = _helpers.blobbits() def splitbuf(buf): ofs = 0 for c in buf: ofs += 1 if ord(c) >= basebits: return ofs, ord(c) return 0, 0 with no_lingering_errors(): old_splitbuf = _helpers.splitbuf _helpers.splitbuf = splitbuf old_BLOB_MAX = hashsplit.BLOB_MAX hashsplit.BLOB_MAX = 4 old_BLOB_READ_SIZE = hashsplit.BLOB_READ_SIZE hashsplit.BLOB_READ_SIZE = 10 old_fanout = hashsplit.fanout hashsplit.fanout = 2 levels = lambda f: [(len(b), l) for b, l in hashsplit.hashsplit_iter([f], True, None)] # Return a string of n null bytes z = lambda n: '\x00' * n # Return a byte which will be split with a level of n sb = lambda n: chr(basebits + n) split_never = BytesIO(z(16)) split_first = BytesIO(z(1) + sb(3) + z(14)) split_end = BytesIO(z(13) + sb(1) + z(2)) split_many = BytesIO(sb(1) + z(3) + sb(2) + z(4) + sb(0) + z(4) + sb(5) + z(1)) WVPASSEQ(levels(split_never), [(4, 0), (4, 0), (4, 0), (4, 0)]) WVPASSEQ(levels(split_first), [(2, 3), (4, 0), (4, 0), (4, 0), (2, 0)]) WVPASSEQ(levels(split_end), [(4, 0), (4, 0), (4, 0), (2, 1), (2, 0)]) WVPASSEQ(levels(split_many), [(1, 1), (4, 2), (4, 0), (1, 0), (4, 0), (1, 5), (1, 0)]) _helpers.splitbuf = old_splitbuf hashsplit.BLOB_MAX = old_BLOB_MAX hashsplit.BLOB_READ_SIZE = old_BLOB_READ_SIZE hashsplit.fanout = old_fanout
def split_to_shalist(w, files, keep_boundaries, progress=None): sl = _split_to_blobs(w, files, keep_boundaries, progress) if not fanout: shal = [] for (sha,size,bits) in sl: shal.append(('100644', sha, size)) return _make_shalist(shal)[0] else: base_bits = _helpers.blobbits() fanout_bits = int(math.log(fanout, 2)) def bits_to_idx(n): assert(n >= base_bits) return (n - base_bits)/fanout_bits stacks = [[]] for (sha,size,bits) in sl: assert(bits <= 32) stacks[0].append(('100644', sha, size)) if bits > base_bits: _squish(w, stacks, bits_to_idx(bits)) #log('stacks: %r\n' % [len(i) for i in stacks]) _squish(w, stacks, len(stacks)-1) #log('stacks: %r\n' % [len(i) for i in stacks]) return _make_shalist(stacks[-1])[0]