Esempio n. 1
0
def test_hashsplit_files(tmpdir):
    fn = os.path.join(tmpdir, b'f1')
    f = open(fn, 'wb')
    sz = 0
    for idx in range(10):
        f.write(b'\x00' * 8192 * 4)
        sz += 4 * 8192
    f.close()

    def o():
        return open(fn, 'rb')

    res = [(len(b), lvl)
           for b, lvl in HashSplitter([o(), o(), o()], bits=BUP_BLOBBITS)]
    WVPASSEQ(res, [(32 * 1024, 0)] * 10 * 3)

    def bio(n):
        return BytesIO(split_test_objs[n])

    def ex(n):
        return [(len(split_test_objs[n]), (n - 14) // 4)]

    res = [(len(b), lvl)
           for b, lvl in HashSplitter([o(), bio(14), o()], bits=BUP_BLOBBITS)]
    WVPASSEQ(res, 10 * [(32 * 1024, 0)] + ex(14) + 10 * [(32 * 1024, 0)])

    res = [(len(b), lvl)
           for b, lvl in HashSplitter([bio(14), bio(15)], bits=BUP_BLOBBITS)]
    WVPASSEQ(res, ex(14) + ex(15))

    res = [(len(b), lvl)
           for b, lvl in HashSplitter([bio(14), bio(27)], bits=BUP_BLOBBITS)]
    WVPASSEQ(res, ex(14) + ex(27))
Esempio n. 2
0
 def hslevels(data):
     global hashbits
     global fanout
     return [(len(b), l)
             for b, l in HashSplitter([BytesIO(data)],
                                      bits=hashbits,
                                      fanbits=int(math.log(fanout, 2)))]
Esempio n. 3
0
 def _splitbuf(data):
     data = data[:]
     hs = HashSplitter([BytesIO(data)], bits=BUP_BLOBBITS, fanbits=1)
     sz = 0
     for blob, lvl in hs:
         # this isn't necessarily _quite_ right, but try to
         # reconstruct from a max blob to not having split
         if len(blob) == 4 << 13 and lvl == 0:
             sz += len(blob)
             continue
         yield sz + len(blob), 13 + lvl
         sz = 0
Esempio n. 4
0
def test_hashsplit_boundaries():
    with no_lingering_errors():
        # check with/without boundaries and not finding any split points
        def bio(s):
            return BytesIO(s)

        hs = HashSplitter([
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192)
        ],
                          bits=BUP_BLOBBITS,
                          keep_boundaries=False)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, [(4 * 8192, 0)])

        hs = HashSplitter([
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192),
            bio(b'\x00' * 8192)
        ],
                          bits=BUP_BLOBBITS)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, 4 * [(8192, 0)])

        # check with/without boundaries with split points
        def sbio(n):
            return BytesIO(split_test_objs[n])

        def ex(n):
            p = n
            if p > 13: p -= 1
            return (len(split_test_objs[n]), (p - 13) // 4)

        exp = [ex(13), ex(14), ex(15)]
        inputs = [sbio(13), sbio(14), sbio(15)]
        hs = HashSplitter(inputs, bits=BUP_BLOBBITS)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, exp)
        inputs = [sbio(13), sbio(14), sbio(15)]
        hs = HashSplitter(inputs, bits=BUP_BLOBBITS, keep_boundaries=False)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, exp)

        # check with/without boundaries with found across boundary
        data = split_test_objs[27]
        d1, d2 = data[:len(data) // 2], data[len(data) // 2:]

        hs = HashSplitter([BytesIO(d1), BytesIO(d2)], bits=BUP_BLOBBITS)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, [(len(d1), 0), (len(d2), 0)])

        hs = HashSplitter([BytesIO(d1), BytesIO(d2)],
                          bits=BUP_BLOBBITS,
                          keep_boundaries=False,
                          fanbits=1)
        res = [(len(b), lvl) for b, lvl in hs]
        WVPASSEQ(res, [(len(data), 27 - 13 - 1)])
Esempio n. 5
0
def test_samples():
    for k in split_test_objs:
        if k <= 21:
            # First check that they have the right number of bits.
            rsum = _helpers.rollsum(split_test_objs[k])
            mask = (1 << (k + 1)) - 1
            ones = (1 << k) - 1
            WVPASSEQ(rsum & mask, ones)

        # then also check that again, with the default (bits=13)
        expected = k - 13
        # algorithm ignores 1 bit after the split bits
        if expected > 0:
            expected -= 1
        hs = HashSplitter([BytesIO(split_test_objs[k])],
                          bits=BUP_BLOBBITS,
                          fanbits=1)
        blob, level = next(hs)
        res = (k, len(blob), level)
        WVPASSEQ(res, (k, len(split_test_objs[k]), expected))