def test_unknown(): a = awkward1.fromjson("[[], [], []]").layout assert awkward1.tolist(a) == [[], [], []] assert str(awkward1.typeof(a)) == "var * unknown" assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64") a = awkward1.fromjson("[[], [[], []], [[], [], []]]").layout assert awkward1.tolist(a) == [[], [[], []], [[], [], []]] assert str(awkward1.typeof(a)) == "var * var * unknown" assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.ListType(awkward1.layout.UnknownType())) a = awkward1.layout.FillableArray() a.beginlist() a.endlist() a.beginlist() a.endlist() a.beginlist() a.endlist() assert awkward1.tolist(a) == [[], [], []] assert str(awkward1.typeof(a)) == "var * unknown" assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64") a = a.snapshot() assert awkward1.tolist(a) == [[], [], []] assert str(awkward1.typeof(a)) == "var * unknown" assert awkward1.typeof(a) == awkward1.layout.ListType(awkward1.layout.UnknownType()) assert not awkward1.typeof(a) == awkward1.layout.PrimitiveType("float64")
def test_fromiter(): builder = awkward1.layout.FillableArray() builder.integer(0) builder.integer(1) builder.integer(2) builder.beginlist() builder.endlist() builder.beginlist() builder.real(1.1) builder.endlist() builder.beginlist() builder.real(1.1) builder.real(2.2) builder.endlist() builder.beginlist() builder.real(1.1) builder.real(2.2) builder.real(3.3) builder.endlist() assert awkward1.tolist(builder.snapshot()) == [ 0, 1, 2, [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3] ] assert awkward1.tolist( awkward1.fromiter( [0, 1, 2, [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3]])) == [0, 1, 2, [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3]] assert awkward1.tolist( awkward1.fromiter([ 0, 1, 2, [], "zero", [1.1], "one", [1.1, 2.2], "two", [1.1, 2.2, 3.3], "three" ])) == [ 0, 1, 2, [], "zero", [1.1], "one", [1.1, 2.2], "two", [1.1, 2.2, 3.3], "three" ] assert awkward1.tolist( awkward1.fromjson( '[0, 1, 2, [], "zero", [1.1], "one", [1.1, 2.2], "two", [1.1, 2.2, 3.3], "three"]' )) == [ 0, 1, 2, [], "zero", [1.1], "one", [1.1, 2.2], "two", [1.1, 2.2, 3.3], "three" ] assert awkward1.tojson( awkward1.fromjson( '[0,1,2,[],"zero",[1.1],"one",[1.1,2.2],"two",[1.1,2.2,3.3],"three"]' ) ) == '[0,1,2,[],"zero",[1.1],"one",[1.1,2.2],"two",[1.1,2.2,3.3],"three"]'
def test_getitem(): a = awkward1.fromjson("[[], [[], []], [[], [], []]]") assert awkward1.tolist(a[2]) == [[], [], []] assert awkward1.tolist(a[2, 1]) == [] with pytest.raises(ValueError) as excinfo: a[2, 1, 0] assert str(excinfo.value) == "in ListArray64 attempting to get 0, index out of range" assert awkward1.tolist(a[2, 1][()]) == [] with pytest.raises(ValueError) as excinfo: a[2, 1][0] assert str(excinfo.value) == "in EmptyArray attempting to get 0, index out of range" assert awkward1.tolist(a[2, 1][100:200]) == [] assert awkward1.tolist(a[2, 1, 100:200]) == [] assert awkward1.tolist(a[2, 1][numpy.array([], dtype=int)]) == [] assert awkward1.tolist(a[2, 1, numpy.array([], dtype=int)]) == [] with pytest.raises(ValueError) as excinfo: a[2, 1, numpy.array([0], dtype=int)] assert str(excinfo.value) == "in ListArray64 attempting to get 0, index out of range" with pytest.raises(ValueError) as excinfo: a[2, 1][100:200, 0] assert str(excinfo.value) == "in EmptyArray, too many dimensions in slice" with pytest.raises(ValueError) as excinfo: a[2, 1][100:200, 200:300] assert str(excinfo.value) == "in EmptyArray, too many dimensions in slice" with pytest.raises(ValueError) as excinfo: a[2, 1][100:200, numpy.array([], dtype=int)] assert str(excinfo.value) == "in EmptyArray, too many dimensions in slice" assert awkward1.tolist(a[1:, 1:]) == [[[]], [[], []]] with pytest.raises(ValueError) as excinfo: a[1:, 1:, 0] assert str(excinfo.value) == "in ListArray64 attempting to get 0, index out of range"
def test_fromiter_fromjson(): assert awkward1.tolist(awkward1.fromiter(["one", "two", "three" ])) == ["one", "two", "three"] assert awkward1.tolist( awkward1.fromiter([["one", "two", "three"], [], ["four", "five"]])) == [["one", "two", "three"], [], ["four", "five"]] assert awkward1.tolist(awkward1.fromjson('["one", "two", "three"]')) == [ "one", "two", "three" ] assert awkward1.tolist( awkward1.fromjson( '[["one", "two", "three"], [], ["four", "five"]]')) == [[ "one", "two", "three" ], [], ["four", "five"]]
def test_json(): dataset = [ '[{"one":1,"two":1.1},{"one":2,"two":2.2},{"one":3,"two":3.3}]', '[{"one":1,"two":[1.1,2.2,3.3]},{"one":2,"two":[]},{"one":3,"two":[4.4,5.5]}]', '[[{"one":1,"two":1.1},{"one":2,"two":2.2},{"one":3,"two":3.3}],[],[{"one":4,"two":4.4},{"one":5,"two":5.5}]]', '[{"one":{"x":1,"y":1},"two":1.1},{"one":{"x":2,"y":2},"two":2.2},{"one":{"x":3,"y":3},"two":3.3}]', ] for datum in dataset: assert awkward1.tojson(awkward1.fromjson(datum)) == datum
def test_empty_array_slice(): # inspired by PR021::test_getitem a = awkward1.fromjson("[[], [[], []], [[], [], []]]") assert awkward1.tolist(a[2, 1, numpy.array([], dtype=int)]) == [] assert awkward1.tolist(a[2, numpy.array([1], dtype=int), numpy.array([], dtype=int)]) == [] # inspired by PR015::test_deep_numpy content = awkward1.layout.NumpyArray( numpy.array([[0.0, 1.1], [2.2, 3.3], [4.4, 5.5], [6.6, 7.7], [8.8, 9.9]])) listarray = awkward1.layout.ListArray64( awkward1.layout.Index64(numpy.array([0, 3, 3])), awkward1.layout.Index64(numpy.array([3, 3, 5])), content) assert awkward1.tolist(listarray[[2, 0, 0, -1], [1, -1, 0, 0], [0, 1, 0, 1]]) == [8.8, 5.5, 0.0, 7.7] assert awkward1.tolist(listarray[2, 1, numpy.array([], dtype=int)]) == [] assert awkward1.tolist(listarray[2, 1, []]) == [] assert awkward1.tolist(listarray[2, [1], []]) == [] assert awkward1.tolist(listarray[2, [], []]) == []
import zarr import pickle import base64 import pyarrow as pa import awkward1 as ak arr = ak.fromjson("/Users/mdurant/Downloads/bikeroutes.json") parr = ak.to_arrow(arr) def pa_to_zarr(arr, path): z = zarr.open_group(path, mode='w') z.attrs['none'] = [b is None for b in arr.buffers()] z.attrs['length'] = len(arr) for i, buf in enumerate(arr.buffers()): if buf is None: continue z.empty(name=i, dtype='uint8', shape=(len(buf), )) z[:] = buf z.attrs['type'] = base64.b64encode(pickle.dumps(arr.type)).decode() def zarr_to_pa(path): z = zarr.open_group(path, mode='r') buffers = [None if n else pa.py_buffer(z[i][:]) for i, n in enumerate(z.attrs['none'])] typ = pickle.loads(base64.b64decode(z.attrs['type'].encode())) return pa.Array.from_buffers(typ, length=z.attrs['length'], buffers=buffers) def ak_to_zarr(arr, path):
def test_numba(): a = awkward1.fromjson("[[], [[], []], [[], [], []]]").layout @numba.njit def f1(q): return q[2, 1] assert awkward1.tolist(f1(a)) == [] @numba.njit def f2(q): return q[2, 1][()] assert awkward1.tolist(f2(a)) == [] @numba.njit def f3(q): return q[2, 1][100:200] assert awkward1.tolist(f3(a)) == [] @numba.njit def f4(q): return q[2, 1, 0] with pytest.raises(numba.errors.TypingError): f4(a) @numba.njit def f5(q): return q[2, 1, 100:200] assert awkward1.tolist(f5(a)) == [] @numba.njit def f6a(q): return q[2, 1, 100:200, 0] with pytest.raises(numba.errors.TypingError): f6a(a) @numba.njit def f6b(q): return q[2, 1, 100:200][0] with pytest.raises(numba.errors.TypingError): f6b(a) @numba.njit def f7a(q): return q[2, 1, 100:200, 200:300] with pytest.raises(numba.errors.TypingError): f7a(a) @numba.njit def f7b(q): return q[2, 1, 100:200][200:300] assert awkward1.tolist(f7b(a)) == [] @numba.njit def f7c(q): return q[2, 1, 100:200][()] assert awkward1.tolist(f7c(a)) == [] @numba.njit def f8a(q): return q[2, 1, 100:200, numpy.array([], dtype=numpy.int64)] with pytest.raises(numba.errors.TypingError): f8a(a) @numba.njit def f8b(q, z): return q[2, 1, z] assert awkward1.tolist(f8b(a, numpy.array([], dtype=int))) == [] @numba.njit def f8c(q, z): return q[2, 1, z, z] with pytest.raises(numba.errors.TypingError): f8c(a, numpy.array([], dtype=int)) @numba.njit def f8d(q, z): return q[2, 1, z][()] assert awkward1.tolist(f8d(a, numpy.array([], dtype=int))) == []
jobj0 = json.dumps(pyobj0) FRAC = 1 REPS = 2 starttime = time.time() for i in range(REPS): q = awkward.fromiter(json.loads(jobj0)) walltime = (time.time() - starttime) * FRAC / REPS print("awkward.fromiter(json.loads(jobj0))\t", walltime, "sec;\t", sizejobj0 / walltime / 1e6, "million floats/sec") FRAC = 1 REPS = 2 starttime = time.time() for i in range(REPS): q = awkward1.fromjson(jobj0, initial=sizejobj0 + 1) walltime = (time.time() - starttime) * FRAC / REPS print("awkward1.fromjson(jobj0)\t", walltime, "sec;\t", sizejobj0 / walltime / 1e6, "million floats/sec") pyobj1 = awkward1.tolist(array1.content.content[:200000]) # 200000 takes 1 sec sizejobj1 = sum(len(x) for x in pyobj1) jobj1 = json.dumps(pyobj1) FRAC = 1 REPS = 2 starttime = time.time() for i in range(REPS): q = awkward.fromiter(json.loads(jobj1)) walltime = (time.time() - starttime) * FRAC / REPS print("awkward.fromiter(json.loads(jobj1))\t", walltime, "sec;\t",