Ejemplo n.º 1
0
def test_read_bitpacked():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'bitpack')) as f:
        for i, l in enumerate(f):
            if i > count:
                break
            raw, head, wid, res = eval(l)
            i = encoding.Numpy8(np.frombuffer(memoryview(raw), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_bitpacked(i, head, wid, o)
            assert (res == o.so_far()).all()
Ejemplo n.º 2
0
def test_make_definitions_with_nulls():
    for _ in range(10):
        out = np.empty(1000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series(
            np.random.choice([True, None], size=np.random.randint(1, 1000)))
        out, d2 = writer.make_definitions(data, False)
        i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()[:len(data)]
        assert (out == ~data.isnull()).sum()
Ejemplo n.º 3
0
def test_hybrid():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            i = encoding.Numpy8(np.frombuffer(memoryview(data), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()).all()
Ejemplo n.º 4
0
def test_rle():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'rle')) as f:
        for i, l in enumerate(f):
            if i > count:
                break
            data, head, width, res = eval(l)
            i = encoding.Numpy8(np.frombuffer(util.byte_buffer(data), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle(i, head, width, o)
            assert (res == o.so_far()).all()
Ejemplo n.º 5
0
def test_bitpack():
    for _ in range(10):
        values = np.random.randint(0, 15000, size=np.random.randint(10, 100),
                                   dtype=np.int32)
        width = encoding.width_from_max_int(values.max())
        o = encoding.Numpy8(np.zeros(900, dtype=np.uint8))
        writer.encode_bitpacked(values, width, o)
        o.loc = 0
        head = encoding.read_unsigned_var_int(o)
        out = encoding.Numpy32(np.zeros(300, dtype=np.int32))
        encoding.read_bitpacked(o, head, width, out)
        assert (values == out.so_far()[:len(values)]).all()
        assert out.so_far()[len(values):].sum() == 0  # zero padding
        assert out.loc - len(values) < 8
Ejemplo n.º 6
0
def test_rle_bp():
    for _ in range(10):
        values = np.random.randint(0, 15000, size=np.random.randint(10, 100),
                                   dtype=np.int32)
        out = encoding.Numpy32(np.empty(len(values) + 5, dtype=np.int32))
        o = encoding.Numpy8(np.zeros(900, dtype=np.uint8))
        width = encoding.width_from_max_int(values.max())

        # without length
        writer.encode_rle_bp(values, width, o)
        l = o.loc
        o.loc = 0

        encoding.read_rle_bit_packed_hybrid(o, width, length=l, o=out)
        assert (out.so_far()[:len(values)] == values).all()
Ejemplo n.º 7
0
def test_hybrid_extra_bytes():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(TEST_DATA, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            if length is not None:
                data2 = data + b'extra bytes'
            else:
                continue
            i = encoding.Numpy8(np.frombuffer(memoryview(data2), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()[:len(res)]).all()
            assert i.loc == len(data)
Ejemplo n.º 8
0
def test_make_definitions_without_nulls():
    for _ in range(100):
        out = np.empty(10000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series([True] * np.random.randint(1, 10000))
        out, d2 = writer.make_definitions(data, True)

        l = len(data) << 1
        p = 1
        while l > 127:
            l >>= 7
            p += 1
        assert len(out) == 4 + p + 1  # "length", num_count, value

        i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()
        assert (out == ~data.isnull()).sum()