예제 #1
0
def test_make_definitions_with_nulls():
    for _ in range(10):
        out = np.empty(1000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series(np.random.choice([True, None], size=np.random.randint(1, 1000)))
        out, d2 = writer.make_definitions(data, False)
        i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()[: len(data)]
        assert (out == ~data.isnull()).sum()
예제 #2
0
def test_make_definitions_with_nulls():
    for _ in range(10):
        out = np.empty(1000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series(
            np.random.choice([True, None], size=np.random.randint(1, 1000)))
        out, d2 = writer.make_definitions(data, False)
        i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()[:len(data)]
        assert (out == ~data.isnull()).sum()
예제 #3
0
def test_hybrid():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            i = encoding.Numpy8(np.frombuffer(util.byte_buffer(data), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()).all()
예제 #4
0
def test_hybrid():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            i = encoding.Numpy8(np.frombuffer(memoryview(data), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()).all()
예제 #5
0
def test_rle_bp():
    for _ in range(10):
        values = np.random.randint(0, 15000, size=np.random.randint(10, 100), dtype=np.int32)
        out = encoding.Numpy32(np.empty(len(values) + 5, dtype=np.int32))
        o = encoding.Numpy8(np.zeros(900, dtype=np.uint8))
        width = encoding.width_from_max_int(values.max())

        # without length
        writer.encode_rle_bp(values, width, o)
        l = o.loc
        o.loc = 0

        encoding.read_rle_bit_packed_hybrid(o, width, length=l, o=out)
        assert (out.so_far()[: len(values)] == values).all()
예제 #6
0
def test_rle_bp():
    for _ in range(10):
        values = np.random.randint(0, 15000, size=np.random.randint(10, 100),
                                   dtype=np.int32)
        out = encoding.Numpy32(np.empty(len(values) + 5, dtype=np.int32))
        o = encoding.Numpy8(np.zeros(900, dtype=np.uint8))
        width = encoding.width_from_max_int(values.max())

        # without length
        writer.encode_rle_bp(values, width, o)
        l = o.loc
        o.loc = 0

        encoding.read_rle_bit_packed_hybrid(o, width, length=l, o=out)
        assert (out.so_far()[:len(values)] == values).all()
예제 #7
0
def test_hybrid_extra_bytes():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(TEST_DATA, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            if length is not None:
                data2 = data + b'extra bytes'
            else:
                continue
            i = encoding.Numpy8(np.frombuffer(memoryview(data2), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()[:len(res)]).all()
            assert i.loc == len(data)
예제 #8
0
def test_hybrid_extra_bytes():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(here, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            if length is not None:
                data = data + b'extra bytes'
                length += len(b'extra bytes')
            else:
                continue
            i = encoding.Numpy8(np.frombuffer(util.byte_buffer(data), dtype=np.uint8))
            o = encoding.Numpy32(results)
            encoding.read_rle_bit_packed_hybrid(i, width, length, o)
            assert (res == o.so_far()[:len(res)]).all()
            assert i.loc == len(data)
예제 #9
0
def test_hybrid():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(TEST_DATA, 'hybrid')) as f:
        for counter, l in enumerate(f):
            if counter > count // 20:
                break
            (data, width, length, res) = eval(l)
            i = encoding.NumpyIO(data)
            o = encoding.NumpyIO(results.view('uint8'))
            encoding.read_rle_bit_packed_hybrid(i,
                                                width,
                                                length or 0,
                                                o,
                                                itemsize=4)
            out = np.frombuffer(o.so_far(), dtype="int32")
            cond = (res == out).all()
            assert cond
예제 #10
0
def test_make_definitions_without_nulls():
    for _ in range(100):
        out = np.empty(10000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series([True] * np.random.randint(1, 10000))
        out, d2 = writer.make_definitions(data, True)

        l = len(data) << 1
        p = 1
        while l > 127:
            l >>= 7
            p += 1
        assert len(out) == 4 + p + 1  # "length", num_count, value

        i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()
        assert (out == ~data.isnull()).sum()
예제 #11
0
def test_make_definitions_without_nulls():
    for _ in range(100):
        out = np.empty(10000, dtype=np.int32)
        o = encoding.Numpy32(out)
        data = pd.Series([True] * np.random.randint(1, 10000))
        out, d2 = writer.make_definitions(data, True)

        l = len(data) << 1
        p = 1
        while l > 127:
            l >>= 7
            p += 1
        assert len(out) == 4 + p + 1  # "length", num_count, value

        i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8))
        encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o)
        out = o.so_far()
        assert (out == ~data.isnull()).sum()
예제 #12
0
def test_hybrid_extra_bytes():
    results = np.empty(1000000, dtype=np.int32)
    with open(os.path.join(TEST_DATA, 'hybrid')) as f:
        for i, l in enumerate(f):
            if i > count // 20:
                break
            (data, width, length, res) = eval(l)
            if length is not None:
                data2 = data + b'extra bytes'
            else:
                continue
            i = encoding.NumpyIO(data2)
            o = encoding.NumpyIO(results.view("uint8"))
            encoding.read_rle_bit_packed_hybrid(i,
                                                width,
                                                length,
                                                o,
                                                itemsize=4)
            out = np.frombuffer(o.so_far(), dtype="int32")
            cond = (res == out).all()
            assert cond
            assert i.tell() == len(data)