def test_categorical_from_arrow_ChunkedArray(): indices = [0, 1, 0, 1, 2, 0, 2] indices_new_schema = [0, 1, 0, 1, 0] dictionary = pyarrow.array([2019, 2020, 2021]) dictionary_new_schema = pyarrow.array([2019, 2020]) dict_array = pyarrow.DictionaryArray.from_arrays(pyarrow.array(indices), dictionary) dict_array_new_schema = pyarrow.DictionaryArray.from_arrays( pyarrow.array(indices_new_schema), dictionary_new_schema) batch = pyarrow.RecordBatch.from_arrays([dict_array], ["year"]) batch_new_schema = pyarrow.RecordBatch.from_arrays([dict_array_new_schema], ["year"]) batches = [batch] * 3 batches_mixed_schema = [batch] + [batch_new_schema] table = pyarrow.Table.from_batches(batches) table_mixed_schema = pyarrow.Table.from_batches(batches_mixed_schema) array = ak.from_arrow(table) array_mixed_schema = ak.from_arrow(table_mixed_schema) assert np.asarray( array.layout.field(0).content.index).tolist() == indices * 3 assert (np.asarray( array_mixed_schema.layout.field(0).content.index).tolist() == indices + indices_new_schema)
def test_union_from_arrow(): original = ak.Array([1.1, 2.2, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * union[float64, var * int64, string]" assert reconstituted.tolist() == [1.1, 2.2, [1, 2, 3], "hello"] original = ak.Array([1.1, 2.2, None, [1, 2, 3], "hello"]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert (str(reconstituted.type) == "5 * union[?float64, option[var * int64], option[string]]") assert reconstituted.tolist() == [1.1, 2.2, None, [1, 2, 3], "hello"]
def test_optional_categorical_from_arrow(): # construct categorical array from option-typed DictionaryArray indices = pyarrow.array([0, 1, 0, 1, 2, 0, 2]) nan_indices = pyarrow.array([0, 1, 0, 1, 2, None, 0, 2]) dictionary = pyarrow.array([2019, 2020, 2021]) dict_array = pyarrow.DictionaryArray.from_arrays(indices, dictionary) categorical_array = ak.from_arrow(dict_array) assert categorical_array.layout.parameter("__array__") == "categorical" option_dict_array = pyarrow.DictionaryArray.from_arrays( nan_indices, dictionary) option_categorical_array = ak.from_arrow(option_dict_array) assert option_categorical_array.layout.parameter( "__array__") == "categorical"
def test_to_table_2(): array = ak.Array([ [{ "x": 0.0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": None }], [], [{ "x": 3.3, "y": [1, 2, 3] }, None, { "x": 4.4, "y": [1, 2, 3, 4] }], ]) assert str( array.type) == '3 * var * ?{"x": float64, "y": option[var * int64]}' pa_table = ak.to_arrow_table(array) array2 = ak.from_arrow(pa_table) assert str(array2.type) == str(array.type) assert array2.tolist() == array.tolist()
def test_to_arrow_table(): assert ak.from_arrow( ak.to_arrow_table( ak.Array([[{ "x": 1.1, "y": [1] }], [], [{ "x": 2.2, "y": [1, 2] }]]), explode_records=True, )).tolist() == [ { "x": [1.1], "y": [[1]] }, { "x": [], "y": [] }, { "x": [2.2], "y": [[1, 2]] }, ] assert ak.from_arrow( ak.to_arrow_table( ak.Array([{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }]))).tolist() == [{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }]
def test(): assert ak.from_arrow(pyarrow.Table.from_pydict({"x": [None, None, None]})).tolist() == [{ "x": None }, { "x": None }, { "x": None }] assert ak.from_arrow( pyarrow.Table.from_pydict({"x": [[None, None], [], [None]]})).tolist() == [{ "x": [None, None] }, { "x": [] }, { "x": [None] }]
def test_list_from_arrow(): original = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "3 * var * float64" assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] original = ak.Array([[1.1, 2.2, None], [], [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "3 * var * ?float64" assert reconstituted.tolist() == [[1.1, 2.2, None], [], [4.4, 5.5]] original = ak.Array([[1.1, 2.2, 3.3], [], None, [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * option[var * float64]" assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], None, [4.4, 5.5]] original = ak.Array([[1.1, 2.2, None], [], None, [4.4, 5.5]]) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type) == "4 * option[var * ?float64]" assert reconstituted.tolist() == [[1.1, 2.2, None], [], None, [4.4, 5.5]]
def test(): struct = pyarrow.struct([ pyarrow.field("x", pyarrow.list_(pyarrow.float64(), 2)), pyarrow.field("y", pyarrow.list_(pyarrow.float64(), 2)), ]) array = pyarrow.array( [ { "x": [1.1, 2.1], "y": [3.1, 4.1] }, { "x": [1.2, 2.2], "y": [3.2, 4.2] }, { "x": [1.3, 2.3], "y": [3.3, 4.3] }, ], type=struct, ) as_awkward = ak.from_arrow(array) assert ak.to_list(as_awkward) == [ { "x": [1.1, 2.1], "y": [3.1, 4.1] }, { "x": [1.2, 2.2], "y": [3.2, 4.2] }, { "x": [1.3, 2.3], "y": [3.3, 4.3] }, ] assert (str(as_awkward.type) == '3 * {"x": option[2 * ?float64], "y": option[2 * ?float64]}')
def test_from_buffers(): array = ak.Array([ [{ "x": 0.0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": None }], [], [{ "x": 3.3, "y": [1, 2, 3] }, None, { "x": 4.4, "y": [1, 2, 3, 4] }], ]) assert str( array.type) == '3 * var * ?{"x": float64, "y": option[var * int64]}' pa_table = ak.to_arrow_table(array) awkward_array = ak.from_arrow(pa_table) form, length, container = ak.to_buffers(awkward_array) reconstituted = ak.from_buffers(form, length, container, lazy=True) assert reconstituted[2].tolist() == [ { "x": 3.3, "y": [1, 2, 3] }, None, { "x": 4.4, "y": [1, 2, 3, 4] }, ]
def test_record_from_arrow(): x_content = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout z_content = ak.Array([1, 2, 3, None, 5]).layout original = ak.Array( ak.layout.RecordArray( [ x_content, ak.layout.UnmaskedArray(x_content), z_content, ], ["x", "y", "z"], )) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str( reconstituted.type) == '5 * {"x": float64, "y": ?float64, "z": ?int64}' assert reconstituted.tolist() == [ { "x": 1.1, "y": 1.1, "z": 1 }, { "x": 2.2, "y": 2.2, "z": 2 }, { "x": 3.3, "y": 3.3, "z": 3 }, { "x": 4.4, "y": 4.4, "z": None }, { "x": 5.5, "y": 5.5, "z": 5 }, ] original = ak.Array( ak.layout.ByteMaskedArray( ak.layout.Index8( np.array([False, True, False, False, False], np.int8)), original.layout, valid_when=False, )) pa_array = ak.to_arrow(original) reconstituted = ak.from_arrow(pa_array) assert str(reconstituted.type ) == '5 * ?{"x": float64, "y": ?float64, "z": ?int64}' assert reconstituted.tolist() == [ { "x": 1.1, "y": 1.1, "z": 1 }, None, { "x": 3.3, "y": 3.3, "z": 3 }, { "x": 4.4, "y": 4.4, "z": None }, { "x": 5.5, "y": 5.5, "z": 5 }, ]
def test_from_arrow(): array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.date64(), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.date32(), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.time(1, 0, 0), datetime.time(2, 30, 0)], type=pyarrow.time64("us"), ) ) assert array.tolist() == [ np.datetime64("1970-01-01T01:00:00.000"), np.datetime64("1970-01-01T02:30:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.time(1, 0, 0), datetime.time(2, 30, 0)], type=pyarrow.time64("ns"), ) ) assert array.tolist() == [ np.datetime64("1970-01-01T01:00:00.000"), np.datetime64("1970-01-01T02:30:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.time(1, 0, 0), datetime.time(2, 30, 0)], type=pyarrow.time32("s"), ) ) assert array.tolist() == [ np.datetime64("1970-01-01T01:00:00.000"), np.datetime64("1970-01-01T02:30:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.time(1, 0, 0), datetime.time(2, 30, 0)], type=pyarrow.time32("ms"), ) ) assert array.tolist() == [ np.datetime64("1970-01-01T01:00:00.000"), np.datetime64("1970-01-01T02:30:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.timestamp("s"), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.timestamp("ms"), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.timestamp("us"), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)], type=pyarrow.timestamp("ns"), ) ) assert array.tolist() == [ np.datetime64("2002-01-23T00:00:00.000"), np.datetime64("2019-02-20T00:00:00.000"), ] array = ak.from_arrow( pyarrow.array( [datetime.timedelta(5), datetime.timedelta(10)], type=pyarrow.duration("s"), ) ) assert array.tolist() == [ np.timedelta64(5, "D"), np.timedelta64(10, "D"), ] array = ak.from_arrow( pyarrow.array( [datetime.timedelta(5), datetime.timedelta(10)], type=pyarrow.duration("ms"), ) ) assert array.tolist() == [ np.timedelta64(5, "D"), np.timedelta64(10, "D"), ] array = ak.from_arrow( pyarrow.array( [datetime.timedelta(5), datetime.timedelta(10)], type=pyarrow.duration("us"), ) ) assert array.tolist() == [ np.timedelta64(5, "D"), np.timedelta64(10, "D"), ] array = ak.from_arrow( pyarrow.array( [datetime.timedelta(5), datetime.timedelta(10)], type=pyarrow.duration("ns"), ) ) assert array.tolist() == [ np.timedelta64(5, "D"), np.timedelta64(10, "D"), ]
def test_arrow_nomask(): array = ak.Array([1.1, 2.2, 3.3, 4.4, None]) assert str(ak.type(ak.from_arrow(ak.to_arrow(array)))) == "5 * ?float64" assert str(ak.type(ak.from_arrow(ak.to_arrow( array[:-1])))) == "4 * ?float64"