def test_attributes(schema, attrs): data = io.BytesIO() schema.set_attributes(attrs) writer = Writer(data, schema) writer.close() reader = Reader(data) assert len(reader) == 0 assert reader.schema.attributes == attrs
def test_write_complex_type(orc_type, values): data = io.BytesIO() writer = Writer(data, orc_type, struct_repr=StructRepr.DICT) for rec in values: writer.write(rec) writer.close() data.seek(0) reader = Reader(data, struct_repr=StructRepr.DICT) assert reader.read() == values
def test_write(): data = io.BytesIO() writer = Writer(data, "struct<col0:int,col1:string,col2:double>") records = [(1, "Test A", 2.13), (2, "Test B", 0.123213), (3, "Test C", 123.011234)] for rec in records: writer.write(rec) writer.close() data.seek(0) reader = Reader(data) assert reader.read() == records
def test_writerows(): data = io.BytesIO() writer = Writer(data, "int") rows = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) res = writer.writerows(rows) writer.close() assert res == len(rows) data.seek(0) reader = Reader(data) assert list(rows) == reader.read()
def test_current_row(): data = io.BytesIO() writer = Writer(data, "struct<col0:int,col1:string,col2:double>") assert writer.current_row == 0 writer.write((0, "Test A", 0.0001)) assert writer.current_row == 1 for i in range(10): writer.write((i, "Test A", 0.0001)) assert writer.current_row == 11 writer.close() data.seek(0) reader = Reader(data) assert writer.current_row == len(reader)
def test_write_primitive_type(orc_type, values): data = io.BytesIO() writer = Writer(data, orc_type) for rec in values: writer.write(rec) writer.close() data.seek(0) reader = Reader(data) if orc_type == "float": result = reader.read() assert len(result) == len(values) for res, exp in zip(result, values): if exp is None: assert res is None else: assert math.isclose(res, exp, rel_tol=1e-07, abs_tol=0.0) else: assert reader.read() == values
def test_write_nones(orc_type, value): data = io.BytesIO() writer = Writer(data, orc_type, batch_size=20) for _ in range(100): writer.write(value) for _ in range(100): writer.write(None) writer.close() data.seek(0) reader = Reader(data, batch_size=30) non_nones = reader.read(100) nones = reader.read(100) assert len(reader) == 200 if orc_type in ("float", "double"): assert math.isclose(non_nones[0], value, rel_tol=1e-07, abs_tol=0.0) assert math.isclose(non_nones[-1], value, rel_tol=1e-07, abs_tol=0.0) else: assert non_nones[0] == value assert non_nones[-1] == value assert all(row is not None for row in non_nones) assert all(row is None for row in nones)