def test_union_to_arrow(): ak_array = ak.Array([1.1, 2.2, None, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(ak_array) assert ( str(pa_array.type) == "dense_union<0: double=0, 1: large_list<item: int64 not null>=1, 2: string=2>" ) assert pa_array.to_pylist() == [1.1, 2.2, None, [1, 2, 3], "hello"] ak_array = ak.Array( ak.layout.UnmaskedArray( ak.Array([1.1, 2.2, [1, 2, 3], "hello"]).layout)) pa_array = ak.to_arrow(ak_array) assert ( str(pa_array.type) == "dense_union<0: double=0, 1: large_list<item: int64 not null>=1, 2: string=2>" ) assert pa_array.to_pylist() == [1.1, 2.2, [1, 2, 3], "hello"] ak_array = ak.Array([1.1, 2.2, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(ak_array) assert ( str(pa_array.type) == "dense_union<0: double not null=0, 1: large_list<item: int64 not null> not null=1, 2: string not null=2>" ) assert pa_array.to_pylist() == [1.1, 2.2, [1, 2, 3], "hello"]
def test_union_from_arrow(): original = ak.Array([1.1, 2.2, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * union[float64, var * int64, string]" assert reconstituted.tolist() == [1.1, 2.2, [1, 2, 3], "hello"] original = ak.Array([1.1, 2.2, None, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert (str(reconstituted.type) == "5 * union[?float64, option[var * int64], option[string]]") assert reconstituted.tolist() == [1.1, 2.2, None, [1, 2, 3], "hello"]
def test_list_to_arrow(): ak_array = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) pa_array = ak.to_arrow(ak_array) assert str(pa_array.type) == "large_list<item: double not null>" assert pa_array.to_pylist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] ak_array = ak.Array( ak.layout.ListOffsetArray64( ak_array.layout.offsets, ak.layout.UnmaskedArray(ak_array.layout.content))) pa_array = ak.to_arrow(ak_array) assert str(pa_array.type) == "large_list<item: double>" assert pa_array.to_pylist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] ak_array = ak.Array([[1.1, 2.2, None], [], [4.4, 5.5]]) pa_array = ak.to_arrow(ak_array) assert str(pa_array.type) == "large_list<item: double>" assert pa_array.to_pylist() == [[1.1, 2.2, None], [], [4.4, 5.5]]
def test_list_from_arrow(): original = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "3 * var * float64" assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] original = ak.Array([[1.1, 2.2, None], [], [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "3 * var * ?float64" assert reconstituted.tolist() == [[1.1, 2.2, None], [], [4.4, 5.5]] original = ak.Array([[1.1, 2.2, 3.3], [], None, [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * option[var * float64]" assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], None, [4.4, 5.5]] original = ak.Array([[1.1, 2.2, None], [], None, [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * option[var * ?float64]" assert reconstituted.tolist() == [[1.1, 2.2, None], [], None, [4.4, 5.5]]
def test_record_to_arrow(): x_content = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout z_content = ak.Array([1, 2, 3, None, 5]).layout ak_array = ak.Array( ak.layout.RecordArray( [ x_content, ak.layout.UnmaskedArray(x_content), z_content, ], ["x", "y", "z"], )) pa_array = ak.to_arrow(ak_array) assert str( pa_array.type) == "struct<x: double not null, y: double, z: int64>" assert pa_array.to_pylist() == [ { "x": 1.1, "y": 1.1, "z": 1 }, { "x": 2.2, "y": 2.2, "z": 2 }, { "x": 3.3, "y": 3.3, "z": 3 }, { "x": 4.4, "y": 4.4, "z": None }, { "x": 5.5, "y": 5.5, "z": 5 }, ]
def test(): array = ak.repartition(ak.Array(range(10)), [3, 3, 3, 1]) arrow = ak.to_arrow(array) assert isinstance(arrow, pyarrow.ChunkedArray) assert arrow.to_pylist() == list(range(10))
def test_record_from_arrow(): x_content = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout z_content = ak.Array([1, 2, 3, None, 5]).layout original = ak.Array( ak.layout.RecordArray( [ x_content, ak.layout.UnmaskedArray(x_content), z_content, ], ["x", "y", "z"], )) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str( reconstituted.type) == '5 * {"x": float64, "y": ?float64, "z": ?int64}' assert reconstituted.tolist() == [ { "x": 1.1, "y": 1.1, "z": 1 }, { "x": 2.2, "y": 2.2, "z": 2 }, { "x": 3.3, "y": 3.3, "z": 3 }, { "x": 4.4, "y": 4.4, "z": None }, { "x": 5.5, "y": 5.5, "z": 5 }, ] original = ak.Array( ak.layout.ByteMaskedArray( ak.layout.Index8( np.array([False, True, False, False, False], np.int8)), original.layout, valid_when=False, )) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type ) == '5 * ?{"x": float64, "y": ?float64, "z": ?int64}' assert reconstituted.tolist() == [ { "x": 1.1, "y": 1.1, "z": 1 }, None, { "x": 3.3, "y": 3.3, "z": 3 }, { "x": 4.4, "y": 4.4, "z": None }, { "x": 5.5, "y": 5.5, "z": 5 }, ]
def test_arrow_nomask(): array = ak.Array([1.1, 2.2, 3.3, 4.4, None]) assert str(ak.type(ak.from_arrow(ak.to_arrow(array)))) == "5 * ?float64" assert str(ak.type(ak.from_arrow(ak.to_arrow( array[:-1])))) == "4 * ?float64"