def test_take(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0): (getitem, (np.concatenate, [(getitem, ('x', 0), ([1, 3, 5],)), (getitem, ('x', 2), ([7],))], 0), ([2, 0, 3, 1], ))} assert dsk == expected assert chunks == ((4,),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0, j): (getitem, (np.concatenate, [(getitem, ('x', 0, j), ([1, 3, 5], slice(None, None, None))), (getitem, ('x', 2, j), ([7], slice(None, None, None)))], 0), ([2, 0, 3, 1], slice(None, None, None))) for j in range(2)} assert dsk == expected assert chunks == ((4,), (20, 20)) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 37, 3], axis=1) expected = {('y', i, 0): (getitem, (np.concatenate, [(getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ('x', i, 1), (slice(None, None, None), [17]))], 1), (slice(None, None, None), [2, 0, 3, 1])) for i in range(4)} assert dsk == expected assert chunks == ((20, 20, 20, 20), (4,))
def test_take(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0): (getitem, (np.concatenate, [(getitem, ('x', 0), (np.array([1, 3, 5]),)), (getitem, ('x', 2), (np.array([7]),))], 0), (np.array([2, 0, 3, 1]), ))} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((4,),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [ 5, 1, 47, 3], axis=0) expected = {('y', 0, j): (getitem, (np.concatenate, [(getitem, ('x', 0, j), ([1, 3, 5], slice(None, None, None))), (getitem, ('x', 2, j), ([7], slice(None, None, None)))], 0), ([2, 0, 3, 1], slice(None, None, None))) for j in range(2)} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((4,), (20, 20)) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [ 5, 1, 37, 3], axis=1) expected = {('y', i, 0): (getitem, (np.concatenate, [(getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ('x', i, 1), (slice(None, None, None), [17]))], 1), (slice(None, None, None), [2, 0, 3, 1])) for i in range(4)} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((20, 20, 20, 20), (4,))
def test_take(): chunks, dsk = take("y", "x", [(20, 20, 20, 20)], [5, 1, 47, 3], itemsize=8, axis=0) expected = { ("y", 0): (getitem, ("x", 0), (np.array([5, 1]),)), ("y", 1): (getitem, ("x", 2), (np.array([7]),)), ("y", 2): (getitem, ("x", 0), (np.array([3]),)), } np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((2, 1, 1),) chunks, dsk = take( "y", "x", [(20, 20, 20, 20), (20, 20)], [5, 1, 47, 3], itemsize=8, axis=0 ) expected = { ("y", 0, 0): ( getitem, ("x", 0, 0), (np.array([5, 1]), slice(None, None, None)), ), ("y", 0, 1): ( getitem, ("x", 0, 1), (np.array([5, 1]), slice(None, None, None)), ), ("y", 1, 0): (getitem, ("x", 2, 0), (np.array([7]), slice(None, None, None))), ("y", 1, 1): (getitem, ("x", 2, 1), (np.array([7]), slice(None, None, None))), ("y", 2, 0): (getitem, ("x", 0, 0), (np.array([3]), slice(None, None, None))), ("y", 2, 1): (getitem, ("x", 0, 1), (np.array([3]), slice(None, None, None))), } np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((2, 1, 1), (20, 20))
def test_take_sorted(): chunks, dsk = take("y", "x", [(20, 20, 20, 20)], [1, 3, 5, 47], itemsize=8, axis=0) expected = { ("y", 0): (getitem, ("x", 0), ([1, 3, 5], )), ("y", 1): (getitem, ("x", 2), ([7], )), } np.testing.assert_equal(dsk, expected) assert chunks == ((3, 1), ) chunks, dsk = take("y", "x", [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], itemsize=8, axis=1) expected = merge( {("y", i, 0): (getitem, ("x", i, 0), (slice(None, None, None), [1, 3, 5])) for i in range(4)}, {("y", i, 1): (getitem, ("x", i, 1), (slice(None, None, None), [17])) for i in range(4)}, ) np.testing.assert_equal(dsk, expected) assert chunks == ((20, 20, 20, 20), (3, 1))
def test_take_avoids_large_chunks(): # unit test for https://github.com/dask/dask/issues/6270 with dask.config.set({"array.slicing.split-large-chunks": True}): chunks = ((1, 1, 1, 1), (500, ), (500, )) itemsize = 8 index = np.array([0, 1] + [2] * 101 + [3]) chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 51, 50, 1), (500, ), (500, )) assert len(dsk) == 5 index = np.array([0] * 101 + [1, 2, 3]) chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((51, 50, 1, 1, 1), (500, ), (500, )) assert len(dsk) == 5 index = np.array([0, 1, 2] + [3] * 101) chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 1, 51, 50), (500, ), (500, )) assert len(dsk) == 5 chunks = ((500, ), (1, 1, 1, 1), (500, )) index = np.array([0, 1, 2] + [3] * 101) chunks2, dsk = take("a", "b", chunks, index, itemsize, axis=1) assert chunks2 == ((500, ), (1, 1, 1, 51, 50), (500, )) assert len(dsk) == 5
def test_take_sorted(): chunks, dsk = take("y", "x", [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) expected = {("y", 0): (getitem, ("x", 0), ([1, 3, 5],)), ("y", 1): (getitem, ("x", 2), ([7],))} assert dsk == expected assert chunks == ((3, 1),) chunks, dsk = take("y", "x", [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], axis=1) expected = merge( dict((("y", i, 0), (getitem, ("x", i, 0), (slice(None, None, None), [1, 3, 5]))) for i in range(4)), dict((("y", i, 1), (getitem, ("x", i, 1), (slice(None, None, None), [17]))) for i in range(4)), ) assert dsk == expected assert chunks == ((20, 20, 20, 20), (3, 1))
def test_take_sorted(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) expected = {('y', 0): (getitem, ('x', 0), ([1, 3, 5],)), ('y', 1): (getitem, ('x', 2), ([7],))} np.testing.assert_equal(dsk, expected) assert chunks == ((3, 1),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], axis=1) expected = merge(dict((('y', i, 0), (getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5]))) for i in range(4)), dict((('y', i, 1), (getitem, ('x', i, 1), (slice(None, None, None), [17]))) for i in range(4))) np.testing.assert_equal(dsk, expected) assert chunks == ((20, 20, 20, 20), (3, 1))
def test_take_uses_config(): chunks = ((1, 1, 1, 1), (500,), (500,)) index = np.array([0, 1] + [2] * 101 + [3]) itemsize = 8 with config.set(**{"array.chunk-size": "10GB"}): chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 101, 1), (500,), (500,)) assert len(dsk) == 4
def test_take_sorted(): blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) expected = {('y', 0): (getitem, ('x', 0), ([1, 3, 5],)), ('y', 1): (getitem, ('x', 2), ([7],))} assert dsk == expected assert blockdims == ((3, 1),) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], axis=1) expected = merge( dict((('y', i, 0), (getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5]))) for i in range(4)), dict((('y', i, 1), (getitem, ('x', i, 1), (slice(None, None, None), [17]))) for i in range(4))) assert dsk == expected assert blockdims == ((20, 20, 20, 20), (3, 1))
def test_take_uses_config(): with dask.config.set({"array.slicing.split-large-chunks": True}): chunks = ((1, 1, 1, 1), (500, ), (500, )) index = np.array([0, 1] + [2] * 101 + [3]) itemsize = 8 with config.set({"array.chunk-size": "10GB"}): chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 101, 1), (500, ), (500, )) assert len(dsk) == 4
def test_take(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0): (getitem, ('x', 0), (np.array([5, 1]),)), ('y', 1): (getitem, ('x', 2), (np.array([7]),)), ('y', 2): (getitem, ('x', 0), (np.array([3]),))} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((2, 1, 1),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [ 5, 1, 47, 3], axis=0) expected = {('y', 0, 0): (getitem, ('x', 0, 0), (np.array([5, 1]), slice(None, None, None))), ('y', 0, 1): (getitem, ('x', 0, 1), (np.array([5, 1]), slice(None, None, None))), ('y', 1, 0): (getitem, ('x', 2, 0), (np.array([7]), slice(None, None, None))), ('y', 1, 1): (getitem, ('x', 2, 1), (np.array([7]), slice(None, None, None))), ('y', 2, 0): (getitem, ('x', 0, 0), (np.array([3]), slice(None, None, None))), ('y', 2, 1): (getitem, ('x', 0, 1), (np.array([3]), slice(None, None, None)))} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((2, 1, 1), (20, 20))
def test_take_sorted(): blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) expected = { ('y', 0): (getitem, ('x', 0), ([1, 3, 5], )), ('y', 1): (getitem, ('x', 2), ([7], )) } assert dsk == expected assert blockdims == ((3, 1), ) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], axis=1) expected = merge( dict((('y', i, 0), (getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5]))) for i in range(4)), dict((('y', i, 1), (getitem, ('x', i, 1), (slice(None, None, None), [17]))) for i in range(4))) assert dsk == expected assert blockdims == ((20, 20, 20, 20), (3, 1))
def test_take(): blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = { ('y', 0): (getitem, (np.concatenate, (list, [ (getitem, ('x', 0), ([1, 3, 5], )), (getitem, ('x', 2), ([7], )) ]), 0), ([2, 0, 3, 1], )) } assert dsk == expected assert blockdims == ((4, ), ) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 47, 3], axis=0) expected = dict( (('y', 0, j), (getitem, (np.concatenate, (list, [(getitem, ('x', 0, j), ([1, 3, 5], slice(None, None, None)) ), (getitem, ('x', 2, j), ([7], slice(None, None, None)))]), 0), ([2, 0, 3, 1], slice(None, None, None)))) for j in range(2)) assert dsk == expected assert blockdims == ((4, ), (20, 20)) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 37, 3], axis=1) expected = dict((('y', i, 0), (getitem, (np.concatenate, ( list, [(getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ('x', i, 1), (slice(None, None, None), [17]))]), 1), (slice(None, None, None), [2, 0, 3, 1]))) for i in range(4)) assert dsk == expected assert blockdims == ((20, 20, 20, 20), (4, ))
def test_take(): blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0): (getitem, (np.concatenate, (list, [(getitem, ('x', 0), ([1, 3, 5],)), (getitem, ('x', 2), ([7],))]), 0), ([2, 0, 3, 1],))} assert dsk == expected assert blockdims == ((4,),) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 47, 3], axis=0) expected = dict((('y', 0, j), (getitem, (np.concatenate, (list, [(getitem, ('x', 0, j), ([1, 3, 5], slice(None, None, None))), (getitem, ('x', 2, j), ([7], slice(None, None, None)))]), 0), ([2, 0, 3, 1], slice(None, None, None)))) for j in range(2)) assert dsk == expected assert blockdims == ((4,), (20, 20)) blockdims, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [5, 1, 37, 3], axis=1) expected = dict((('y', i, 0), (getitem, (np.concatenate, (list, [(getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ('x', i, 1), (slice(None, None, None), [17]))]), 1), (slice(None, None, None), [2, 0, 3, 1]))) for i in range(4)) assert dsk == expected assert blockdims == ((20, 20, 20, 20), (4,))
def test_take(): chunks, dsk = take("y", "x", [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = { ("y", 0): ( getitem, (np.concatenate, (list, [(getitem, ("x", 0), ([1, 3, 5],)), (getitem, ("x", 2), ([7],))]), 0), ([2, 0, 3, 1],), ) } assert dsk == expected assert chunks == ((4,),) chunks, dsk = take("y", "x", [(20, 20, 20, 20), (20, 20)], [5, 1, 47, 3], axis=0) expected = dict( ( ("y", 0, j), ( getitem, ( np.concatenate, ( list, [ (getitem, ("x", 0, j), ([1, 3, 5], slice(None, None, None))), (getitem, ("x", 2, j), ([7], slice(None, None, None))), ], ), 0, ), ([2, 0, 3, 1], slice(None, None, None)), ), ) for j in range(2) ) assert dsk == expected assert chunks == ((4,), (20, 20)) chunks, dsk = take("y", "x", [(20, 20, 20, 20), (20, 20)], [5, 1, 37, 3], axis=1) expected = dict( ( ("y", i, 0), ( getitem, ( np.concatenate, ( list, [ (getitem, ("x", i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ("x", i, 1), (slice(None, None, None), [17])), ], ), 1, ), (slice(None, None, None), [2, 0, 3, 1]), ), ) for i in range(4) ) assert dsk == expected assert chunks == ((20, 20, 20, 20), (4,))