def test_make_definitions_with_nulls(): for _ in range(10): out = np.empty(1000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series(np.random.choice([True, None], size=np.random.randint(1, 1000))) out, d2 = writer.make_definitions(data, False) i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far()[: len(data)] assert (out == ~data.isnull()).sum()
def test_make_definitions_with_nulls(): for _ in range(10): out = np.empty(1000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series( np.random.choice([True, None], size=np.random.randint(1, 1000))) out, d2 = writer.make_definitions(data, False) i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far()[:len(data)] assert (out == ~data.isnull()).sum()
def test_hybrid(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(here, 'hybrid')) as f: for i, l in enumerate(f): if i > count // 20: break (data, width, length, res) = eval(l) i = encoding.Numpy8(np.frombuffer(util.byte_buffer(data), dtype=np.uint8)) o = encoding.Numpy32(results) encoding.read_rle_bit_packed_hybrid(i, width, length, o) assert (res == o.so_far()).all()
def test_hybrid(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(here, 'hybrid')) as f: for i, l in enumerate(f): if i > count // 20: break (data, width, length, res) = eval(l) i = encoding.Numpy8(np.frombuffer(memoryview(data), dtype=np.uint8)) o = encoding.Numpy32(results) encoding.read_rle_bit_packed_hybrid(i, width, length, o) assert (res == o.so_far()).all()
def test_rle_bp(): for _ in range(10): values = np.random.randint(0, 15000, size=np.random.randint(10, 100), dtype=np.int32) out = encoding.Numpy32(np.empty(len(values) + 5, dtype=np.int32)) o = encoding.Numpy8(np.zeros(900, dtype=np.uint8)) width = encoding.width_from_max_int(values.max()) # without length writer.encode_rle_bp(values, width, o) l = o.loc o.loc = 0 encoding.read_rle_bit_packed_hybrid(o, width, length=l, o=out) assert (out.so_far()[: len(values)] == values).all()
def test_rle_bp(): for _ in range(10): values = np.random.randint(0, 15000, size=np.random.randint(10, 100), dtype=np.int32) out = encoding.Numpy32(np.empty(len(values) + 5, dtype=np.int32)) o = encoding.Numpy8(np.zeros(900, dtype=np.uint8)) width = encoding.width_from_max_int(values.max()) # without length writer.encode_rle_bp(values, width, o) l = o.loc o.loc = 0 encoding.read_rle_bit_packed_hybrid(o, width, length=l, o=out) assert (out.so_far()[:len(values)] == values).all()
def test_hybrid_extra_bytes(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(TEST_DATA, 'hybrid')) as f: for i, l in enumerate(f): if i > count // 20: break (data, width, length, res) = eval(l) if length is not None: data2 = data + b'extra bytes' else: continue i = encoding.Numpy8(np.frombuffer(memoryview(data2), dtype=np.uint8)) o = encoding.Numpy32(results) encoding.read_rle_bit_packed_hybrid(i, width, length, o) assert (res == o.so_far()[:len(res)]).all() assert i.loc == len(data)
def test_hybrid_extra_bytes(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(here, 'hybrid')) as f: for i, l in enumerate(f): if i > count // 20: break (data, width, length, res) = eval(l) if length is not None: data = data + b'extra bytes' length += len(b'extra bytes') else: continue i = encoding.Numpy8(np.frombuffer(util.byte_buffer(data), dtype=np.uint8)) o = encoding.Numpy32(results) encoding.read_rle_bit_packed_hybrid(i, width, length, o) assert (res == o.so_far()[:len(res)]).all() assert i.loc == len(data)
def test_hybrid(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(TEST_DATA, 'hybrid')) as f: for counter, l in enumerate(f): if counter > count // 20: break (data, width, length, res) = eval(l) i = encoding.NumpyIO(data) o = encoding.NumpyIO(results.view('uint8')) encoding.read_rle_bit_packed_hybrid(i, width, length or 0, o, itemsize=4) out = np.frombuffer(o.so_far(), dtype="int32") cond = (res == out).all() assert cond
def test_make_definitions_without_nulls(): for _ in range(100): out = np.empty(10000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series([True] * np.random.randint(1, 10000)) out, d2 = writer.make_definitions(data, True) l = len(data) << 1 p = 1 while l > 127: l >>= 7 p += 1 assert len(out) == 4 + p + 1 # "length", num_count, value i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far() assert (out == ~data.isnull()).sum()
def test_make_definitions_without_nulls(): for _ in range(100): out = np.empty(10000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series([True] * np.random.randint(1, 10000)) out, d2 = writer.make_definitions(data, True) l = len(data) << 1 p = 1 while l > 127: l >>= 7 p += 1 assert len(out) == 4 + p + 1 # "length", num_count, value i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far() assert (out == ~data.isnull()).sum()
def test_hybrid_extra_bytes(): results = np.empty(1000000, dtype=np.int32) with open(os.path.join(TEST_DATA, 'hybrid')) as f: for i, l in enumerate(f): if i > count // 20: break (data, width, length, res) = eval(l) if length is not None: data2 = data + b'extra bytes' else: continue i = encoding.NumpyIO(data2) o = encoding.NumpyIO(results.view("uint8")) encoding.read_rle_bit_packed_hybrid(i, width, length, o, itemsize=4) out = np.frombuffer(o.so_far(), dtype="int32") cond = (res == out).all() assert cond assert i.tell() == len(data)