class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testRechunkExecution(self): raw = np.random.random((11, 8)) arr = tensor(raw, chunk_size=3) arr2 = arr.rechunk(4) res = self.executor.execute_tensor(arr2) self.assertTrue(np.array_equal(res[0], raw[:4, :4])) self.assertTrue(np.array_equal(res[1], raw[:4, 4:])) self.assertTrue(np.array_equal(res[2], raw[4:8, :4])) self.assertTrue(np.array_equal(res[3], raw[4:8, 4:])) self.assertTrue(np.array_equal(res[4], raw[8:, :4])) self.assertTrue(np.array_equal(res[5], raw[8:, 4:])) def testCopytoExecution(self): a = ones((2, 3), chunk_size=1) b = tensor([3, -1, 3], chunk_size=2) copyto(a, b, where=b > 1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.array([[3, 1, 3], [3, 1, 3]]) np.testing.assert_equal(res, expected) a = ones((2, 3), chunk_size=1) b = tensor(np.asfortranarray(np.random.rand(2, 3)), chunk_size=2) copyto(b, a) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.asfortranarray(np.ones((2, 3))) np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testAstypeExecution(self): raw = np.random.random((10, 5)) arr = tensor(raw, chunk_size=3) arr2 = arr.astype('i8') res = self.executor.execute_tensor(arr2, concat=True) np.testing.assert_array_equal(res[0], raw.astype('i8')) raw = sps.random(10, 5, density=.2) arr = tensor(raw, chunk_size=3) arr2 = arr.astype('i8') res = self.executor.execute_tensor(arr2, concat=True) self.assertTrue( np.array_equal(res[0].toarray(), raw.astype('i8').toarray())) raw = np.asfortranarray(np.random.random((10, 5))) arr = tensor(raw, chunk_size=3) arr2 = arr.astype('i8', order='C') res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw.astype('i8')) self.assertTrue(res.flags['C_CONTIGUOUS']) self.assertFalse(res.flags['F_CONTIGUOUS']) def testTransposeExecution(self): raw = np.random.random((11, 8, 5)) arr = tensor(raw, chunk_size=3) arr2 = transpose(arr) res = self.executor.execute_tensor(arr2, concat=True) np.testing.assert_array_equal(res[0], raw.T) arr3 = transpose(arr, axes=(-2, -1, -3)) res = self.executor.execute_tensor(arr3, concat=True) np.testing.assert_array_equal(res[0], raw.transpose(1, 2, 0)) raw = sps.random(11, 8) arr = tensor(raw, chunk_size=3) arr2 = transpose(arr) self.assertTrue(arr2.issparse()) res = self.executor.execute_tensor(arr2, concat=True) np.testing.assert_array_equal(res[0].toarray(), raw.T.toarray()) # test order raw = np.asfortranarray(np.random.random((11, 8, 5))) arr = tensor(raw, chunk_size=3) arr2 = transpose(arr) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = np.transpose(raw).copy(order='A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr = tensor(raw, chunk_size=3) arr2 = transpose(arr, (1, 2, 0)) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = np.transpose(raw, (1, 2, 0)).copy(order='A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testSwapaxesExecution(self): raw = np.random.random((11, 8, 5)) arr = tensor(raw, chunk_size=3) arr2 = arr.swapaxes(2, 0) res = self.executor.execute_tensor(arr2, concat=True) np.testing.assert_array_equal(res[0], raw.swapaxes(2, 0)) raw = sps.random(11, 8, density=.2) arr = tensor(raw, chunk_size=3) arr2 = arr.swapaxes(1, 0) res = self.executor.execute_tensor(arr2, concat=True) np.testing.assert_array_equal(res[0].toarray(), raw.toarray().swapaxes(1, 0)) # test order raw = np.asfortranarray(np.random.rand(11, 8, 5)) arr = tensor(raw, chunk_size=3) arr2 = arr.swapaxes(2, 0) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.swapaxes(2, 0).copy(order='A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr = tensor(raw, chunk_size=3) arr2 = arr.swapaxes(0, 2) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.swapaxes(0, 2).copy(order='A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr = tensor(raw, chunk_size=3) arr2 = arr.swapaxes(1, 0) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.swapaxes(1, 0).copy(order='A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testMoveaxisExecution(self): x = zeros((3, 4, 5), chunk_size=2) t = moveaxis(x, 0, -1) res = self.executor.execute_tensor(t, concat=True)[0] self.assertEqual(res.shape, (4, 5, 3)) t = moveaxis(x, -1, 0) res = self.executor.execute_tensor(t, concat=True)[0] self.assertEqual(res.shape, (5, 3, 4)) t = moveaxis(x, [0, 1], [-1, -2]) res = self.executor.execute_tensor(t, concat=True)[0] self.assertEqual(res.shape, (5, 4, 3)) t = moveaxis(x, [0, 1, 2], [-1, -2, -3]) res = self.executor.execute_tensor(t, concat=True)[0] self.assertEqual(res.shape, (5, 4, 3)) def testBroadcastToExecution(self): raw = np.random.random((10, 5, 1)) arr = tensor(raw, chunk_size=2) arr2 = broadcast_to(arr, (5, 10, 5, 6)) res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, np.broadcast_to(raw, (5, 10, 5, 6))) # test chunk with unknown shape arr1 = mt.random.rand(3, 4, chunk_size=2) arr2 = mt.random.permutation(arr1) arr3 = broadcast_to(arr2, (2, 3, 4)) res = self.executor.execute_tensor(arr3, concat=True)[0] self.assertEqual(res.shape, (2, 3, 4)) def testBroadcastArraysExecutions(self): x_data = [[1, 2, 3]] x = tensor(x_data, chunk_size=1) y_data = [[1], [2], [3]] y = tensor(y_data, chunk_size=2) a = broadcast_arrays(x, y) res = [self.executor.execute_tensor(arr, concat=True)[0] for arr in a] expected = np.broadcast_arrays(x_data, y_data) for r, e in zip(res, expected): np.testing.assert_equal(r, e) def testWhereExecution(self): raw_cond = np.random.randint(0, 2, size=(4, 4), dtype='?') raw_x = np.random.rand(4, 1) raw_y = np.random.rand(4, 4) cond, x, y = tensor(raw_cond, chunk_size=2), tensor( raw_x, chunk_size=2), tensor(raw_y, chunk_size=2) arr = where(cond, x, y) res = self.executor.execute_tensor(arr, concat=True) self.assertTrue( np.array_equal(res[0], np.where(raw_cond, raw_x, raw_y))) raw_cond = sps.csr_matrix( np.random.randint(0, 2, size=(4, 4), dtype='?')) raw_x = sps.random(4, 1, density=.1) raw_y = sps.random(4, 4, density=.1) cond, x, y = tensor(raw_cond, chunk_size=2), tensor( raw_x, chunk_size=2), tensor(raw_y, chunk_size=2) arr = where(cond, x, y) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertTrue( np.array_equal( res.toarray(), np.where(raw_cond.toarray(), raw_x.toarray(), raw_y.toarray()))) def testReshapeExecution(self): raw_data = np.random.rand(10, 20, 30) x = tensor(raw_data, chunk_size=6) y = x.reshape(-1, 30) res = self.executor.execute_tensor(y, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(-1, 30)) y2 = x.reshape(10, -1) res = self.executor.execute_tensor(y2, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(10, -1)) y3 = x.reshape(-1) res = self.executor.execute_tensor(y3, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(-1)) y4 = x.ravel() res = self.executor.execute_tensor(y4, concat=True) np.testing.assert_array_equal(res[0], raw_data.ravel()) raw_data = np.random.rand(30, 100, 20) x = tensor(raw_data, chunk_size=6) y = x.reshape(-1, 20, 5, 5, 4) res = self.executor.execute_tensor(y, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(-1, 20, 5, 5, 4)) y2 = x.reshape(3000, 10, 2) res = self.executor.execute_tensor(y2, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(3000, 10, 2)) y3 = x.reshape(60, 25, 40) res = self.executor.execute_tensor(y3, concat=True) np.testing.assert_array_equal(res[0], raw_data.reshape(60, 25, 40)) y4 = x.reshape(60, 25, 40) y4.op.extra_params['_reshape_with_shuffle'] = True size_res = self.executor.execute_tensor(y4, mock=True) res = self.executor.execute_tensor(y4, concat=True) self.assertEqual(res[0].nbytes, sum(v[0] for v in size_res)) self.assertTrue(np.array_equal(res[0], raw_data.reshape(60, 25, 40))) y5 = x.ravel(order='F') res = self.executor.execute_tensor(y5, concat=True)[0] expected = raw_data.ravel(order='F') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testExpandDimsExecution(self): raw_data = np.random.rand(10, 20, 30) x = tensor(raw_data, chunk_size=6) y = expand_dims(x, 1) res = self.executor.execute_tensor(y, concat=True) self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 1))) y = expand_dims(x, 0) res = self.executor.execute_tensor(y, concat=True) self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 0))) y = expand_dims(x, 3) res = self.executor.execute_tensor(y, concat=True) self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 3))) y = expand_dims(x, -1) res = self.executor.execute_tensor(y, concat=True) self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, -1))) y = expand_dims(x, -4) res = self.executor.execute_tensor(y, concat=True) self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, -4))) with self.assertRaises(np.AxisError): expand_dims(x, -5) with self.assertRaises(np.AxisError): expand_dims(x, 4) def testRollAxisExecution(self): x = ones((3, 4, 5, 6), chunk_size=1) y = rollaxis(x, 3, 1) res = self.executor.execute_tensor(y, concat=True) self.assertTrue( np.array_equal(res[0], np.rollaxis(np.ones((3, 4, 5, 6)), 3, 1))) def testAtleast1dExecution(self): x = 1 y = ones(3, chunk_size=2) z = ones((3, 4), chunk_size=2) t = atleast_1d(x, y, z) res = [self.executor.execute_tensor(i, concat=True)[0] for i in t] self.assertTrue(np.array_equal(res[0], np.array([1]))) self.assertTrue(np.array_equal(res[1], np.ones(3))) self.assertTrue(np.array_equal(res[2], np.ones((3, 4)))) def testAtleast2dExecution(self): x = 1 y = ones(3, chunk_size=2) z = ones((3, 4), chunk_size=2) t = atleast_2d(x, y, z) res = [self.executor.execute_tensor(i, concat=True)[0] for i in t] self.assertTrue(np.array_equal(res[0], np.array([[1]]))) self.assertTrue(np.array_equal(res[1], np.atleast_2d(np.ones(3)))) self.assertTrue(np.array_equal(res[2], np.ones((3, 4)))) def testAtleast3dExecution(self): x = 1 y = ones(3, chunk_size=2) z = ones((3, 4), chunk_size=2) t = atleast_3d(x, y, z) res = [self.executor.execute_tensor(i, concat=True)[0] for i in t] self.assertTrue(np.array_equal(res[0], np.atleast_3d(x))) self.assertTrue(np.array_equal(res[1], np.atleast_3d(np.ones(3)))) self.assertTrue(np.array_equal(res[2], np.atleast_3d(np.ones((3, 4))))) def testArgwhereExecution(self): x = arange(6, chunk_size=2).reshape(2, 3) t = argwhere(x > 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.argwhere(np.arange(6).reshape(2, 3) > 1) np.testing.assert_array_equal(res, expected) data = np.asfortranarray(np.random.rand(10, 20)) x = tensor(data, chunk_size=10) t = argwhere(x > 0.5) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.argwhere(data > 0.5) np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testArraySplitExecution(self): x = arange(48, chunk_size=3).reshape(2, 3, 8) ss = array_split(x, 3, axis=2) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.array_split(np.arange(48).reshape(2, 3, 8), 3, axis=2) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] ss = array_split(x, [3, 5, 6, 10], axis=2) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.array_split(np.arange(48).reshape(2, 3, 8), [3, 5, 6, 10], axis=2) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] def testSplitExecution(self): x = arange(48, chunk_size=3).reshape(2, 3, 8) ss = split(x, 4, axis=2) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.split(np.arange(48).reshape(2, 3, 8), 4, axis=2) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] ss = split(x, [3, 5, 6, 10], axis=2) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.split(np.arange(48).reshape(2, 3, 8), [3, 5, 6, 10], axis=2) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] # hsplit x = arange(120, chunk_size=3).reshape(2, 12, 5) ss = hsplit(x, 4) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.hsplit(np.arange(120).reshape(2, 12, 5), 4) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] # vsplit x = arange(48, chunk_size=3).reshape(8, 3, 2) ss = vsplit(x, 4) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.vsplit(np.arange(48).reshape(8, 3, 2), 4) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] # dsplit x = arange(48, chunk_size=3).reshape(2, 3, 8) ss = dsplit(x, 4) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.dsplit(np.arange(48).reshape(2, 3, 8), 4) self.assertEqual(len(res), len(expected)) [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] x_data = sps.random(12, 8, density=.1) x = tensor(x_data, chunk_size=3) ss = split(x, 4, axis=0) res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss] expected = np.split(x_data.toarray(), 4, axis=0) self.assertEqual(len(res), len(expected)) [ np.testing.assert_equal(r.toarray(), e) for r, e in zip(res, expected) ] def testRollExecution(self): x = arange(10, chunk_size=2) t = roll(x, 2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.roll(np.arange(10), 2) np.testing.assert_equal(res, expected) x2 = x.reshape(2, 5) t = roll(x2, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.roll(np.arange(10).reshape(2, 5), 1) np.testing.assert_equal(res, expected) t = roll(x2, 1, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.roll(np.arange(10).reshape(2, 5), 1, axis=0) np.testing.assert_equal(res, expected) t = roll(x2, 1, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.roll(np.arange(10).reshape(2, 5), 1, axis=1) np.testing.assert_equal(res, expected) def testSqueezeExecution(self): data = np.array([[[0], [1], [2]]]) x = tensor(data, chunk_size=1) t = squeeze(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.squeeze(data) np.testing.assert_equal(res, expected) t = squeeze(x, axis=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.squeeze(data, axis=2) np.testing.assert_equal(res, expected) def testDiffExecution(self): data = np.array([1, 2, 4, 7, 0]) x = tensor(data, chunk_size=2) t = diff(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.diff(data) np.testing.assert_equal(res, expected) t = diff(x, n=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.diff(data, n=2) np.testing.assert_equal(res, expected) data = np.array([[1, 3, 6, 10], [0, 5, 6, 8]]) x = tensor(data, chunk_size=2) t = diff(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.diff(data) np.testing.assert_equal(res, expected) t = diff(x, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.diff(data, axis=0) np.testing.assert_equal(res, expected) x = mt.arange('1066-10-13', '1066-10-16', dtype=mt.datetime64) t = diff(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.diff( np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64)) np.testing.assert_equal(res, expected) def testEdiff1d(self): data = np.array([1, 2, 4, 7, 0]) x = tensor(data, chunk_size=2) t = ediff1d(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.ediff1d(data) np.testing.assert_equal(res, expected) to_begin = tensor(-99, chunk_size=2) to_end = tensor([88, 99], chunk_size=2) t = ediff1d(x, to_begin=to_begin, to_end=to_end) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.ediff1d(data, to_begin=-99, to_end=np.array([88, 99])) np.testing.assert_equal(res, expected) data = [[1, 2, 4], [1, 6, 24]] t = ediff1d(tensor(data, chunk_size=2)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.ediff1d(data) np.testing.assert_equal(res, expected) def testFlipExecution(self): a = arange(8, chunk_size=2).reshape((2, 2, 2)) t = flip(a, 0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.flip(np.arange(8).reshape(2, 2, 2), 0) np.testing.assert_equal(res, expected) t = flip(a, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.flip(np.arange(8).reshape(2, 2, 2), 1) np.testing.assert_equal(res, expected) t = flipud(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.flipud(np.arange(8).reshape(2, 2, 2)) np.testing.assert_equal(res, expected) t = fliplr(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.fliplr(np.arange(8).reshape(2, 2, 2)) np.testing.assert_equal(res, expected) def testRepeatExecution(self): a = repeat(3, 4) res = self.executor.execute_tensor(a)[0] expected = np.repeat(3, 4) np.testing.assert_equal(res, expected) x_data = np.random.randn(20, 30) x = tensor(x_data, chunk_size=(3, 4)) t = repeat(x, 2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.repeat(x_data, 2) np.testing.assert_equal(res, expected) t = repeat(x, 3, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.repeat(x_data, 3, axis=1) np.testing.assert_equal(res, expected) t = repeat(x, np.arange(20), axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.repeat(x_data, np.arange(20), axis=0) np.testing.assert_equal(res, expected) t = repeat(x, arange(20, chunk_size=5), axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.repeat(x_data, np.arange(20), axis=0) np.testing.assert_equal(res, expected) x_data = sps.random(20, 30, density=.1) x = tensor(x_data, chunk_size=(3, 4)) t = repeat(x, 2, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.repeat(x_data.toarray(), 2, axis=1) np.testing.assert_equal(res.toarray(), expected) def testTileExecution(self): a_data = np.array([0, 1, 2]) a = tensor(a_data, chunk_size=2) t = tile(a, 2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(a_data, 2) np.testing.assert_equal(res, expected) t = tile(a, (2, 2)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(a_data, (2, 2)) np.testing.assert_equal(res, expected) t = tile(a, (2, 1, 2)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(a_data, (2, 1, 2)) np.testing.assert_equal(res, expected) b_data = np.array([[1, 2], [3, 4]]) b = tensor(b_data, chunk_size=1) t = tile(b, 2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(b_data, 2) np.testing.assert_equal(res, expected) t = tile(b, (2, 1)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(b_data, (2, 1)) np.testing.assert_equal(res, expected) c_data = np.array([1, 2, 3, 4]) c = tensor(c_data, chunk_size=3) t = tile(c, (4, 1)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tile(c_data, (4, 1)) np.testing.assert_equal(res, expected) def testIsInExecution(self): element = 2 * arange(4, chunk_size=1).reshape((2, 2)) test_elements = [1, 2, 4, 8] mask = isin(element, test_elements) res = self.executor.execute_tensor(mask, concat=True)[0] expected = np.isin(2 * np.arange(4).reshape((2, 2)), test_elements) np.testing.assert_equal(res, expected) res = self.executor.execute_tensor(element[mask], concat=True)[0] expected = np.array([2, 4]) np.testing.assert_equal(res, expected) mask = isin(element, test_elements, invert=True) res = self.executor.execute_tensor(mask, concat=True)[0] expected = np.isin(2 * np.arange(4).reshape((2, 2)), test_elements, invert=True) np.testing.assert_equal(res, expected) res = self.executor.execute_tensor(element[mask], concat=True)[0] expected = np.array([0, 6]) np.testing.assert_equal(res, expected) test_set = {1, 2, 4, 8} mask = isin(element, test_set) res = self.executor.execute_tensor(mask, concat=True)[0] expected = np.isin(2 * np.arange(4).reshape((2, 2)), test_set) np.testing.assert_equal(res, expected) def testRavelExecution(self): arr = ones((10, 5), chunk_size=2) flat_arr = mt.ravel(arr) res = self.executor.execute_tensor(flat_arr, concat=True)[0] self.assertEqual(len(res), 50) np.testing.assert_equal(res, np.ones(50)) def testSearchsortedExecution(self): raw = np.sort(np.random.randint(100, size=(16, ))) # test different chunk_size, 3 will have combine, 6 will skip combine for chunk_size in (3, 6): arr = tensor(raw, chunk_size=chunk_size) # test scalar, with value in the middle t1 = searchsorted(arr, 20) res = self.executor.execute_tensor(t1, concat=True)[0] expected = np.searchsorted(raw, 20) np.testing.assert_array_equal(res, expected) # test scalar, with value larger than 100 t2 = searchsorted(arr, 200) res = self.executor.execute_tensor(t2, concat=True)[0] expected = np.searchsorted(raw, 200) np.testing.assert_array_equal(res, expected) # test scalar, side left, with value exact in the middle of the array t3 = searchsorted(arr, raw[10], side='left') res = self.executor.execute_tensor(t3, concat=True)[0] expected = np.searchsorted(raw, raw[10], side='left') np.testing.assert_array_equal(res, expected) # test scalar, side right, with value exact in the middle of the array t4 = searchsorted(arr, raw[10], side='right') res = self.executor.execute_tensor(t4, concat=True)[0] expected = np.searchsorted(raw, raw[10], side='right') np.testing.assert_array_equal(res, expected) # test scalar, side left, with value exact in the end of the array t5 = searchsorted(arr, raw[15], side='left') res = self.executor.execute_tensor(t5, concat=True)[0] expected = np.searchsorted(raw, raw[15], side='left') np.testing.assert_array_equal(res, expected) # test scalar, side right, with value exact in the end of the array t6 = searchsorted(arr, raw[15], side='right') res = self.executor.execute_tensor(t6, concat=True)[0] expected = np.searchsorted(raw, raw[15], side='right') np.testing.assert_array_equal(res, expected) # test scalar, side left, with value exact in the start of the array t7 = searchsorted(arr, raw[0], side='left') res = self.executor.execute_tensor(t7, concat=True)[0] expected = np.searchsorted(raw, raw[0], side='left') np.testing.assert_array_equal(res, expected) # test scalar, side right, with value exact in the start of the array t8 = searchsorted(arr, raw[0], side='right') res = self.executor.execute_tensor(t8, concat=True)[0] expected = np.searchsorted(raw, raw[0], side='right') np.testing.assert_array_equal(res, expected) raw2 = np.random.randint(100, size=(3, 4)) # test tensor, side left t9 = searchsorted(arr, tensor(raw2, chunk_size=2), side='left') res = self.executor.execute_tensor(t9, concat=True)[0] expected = np.searchsorted(raw, raw2, side='left') np.testing.assert_array_equal(res, expected) # test tensor, side right t10 = searchsorted(arr, tensor(raw2, chunk_size=2), side='right') res = self.executor.execute_tensor(t10, concat=True)[0] expected = np.searchsorted(raw, raw2, side='right') np.testing.assert_array_equal(res, expected) # test one chunk arr = tensor(raw, chunk_size=16) # test scalar, tensor to search has 1 chunk t11 = searchsorted(arr, 20) res = self.executor.execute_tensor(t11, concat=True)[0] expected = np.searchsorted(raw, 20) np.testing.assert_array_equal(res, expected) # test tensor with 1 chunk, tensor to search has 1 chunk t12 = searchsorted(arr, tensor(raw2, chunk_size=4)) res = self.executor.execute_tensor(t12, concat=True)[0] expected = np.searchsorted(raw, raw2) np.testing.assert_array_equal(res, expected) # test tensor with more than 1 chunk, tensor to search has 1 chunk t13 = searchsorted(arr, tensor(raw2, chunk_size=2)) res = self.executor.execute_tensor(t13, concat=True)[0] expected = np.searchsorted(raw, raw2) np.testing.assert_array_equal(res, expected) # test sorter raw3 = np.random.randint(100, size=(16, )) arr = tensor(raw3, chunk_size=3) order = np.argsort(raw3) order_arr = tensor(order, chunk_size=4) t14 = searchsorted(arr, 20, sorter=order_arr) res = self.executor.execute_tensor(t14, concat=True)[0] expected = np.searchsorted(raw3, 20, sorter=order) np.testing.assert_array_equal(res, expected) def testUniqueExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(10, )) for chunk_size in (10, 3): x = tensor(raw, chunk_size=chunk_size) y = unique(x) res = self.executor.execute_tensor(y, concat=True)[0] expected = np.unique(raw) np.testing.assert_array_equal(res, expected) y, indices = unique(x, return_index=True) res = self.executor.execute_tensors([y, indices]) expected = np.unique(raw, return_index=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, inverse = unique(x, return_inverse=True) res = self.executor.execute_tensors([y, inverse]) expected = np.unique(raw, return_inverse=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, counts = unique(x, return_counts=True) res = self.executor.execute_tensors([y, counts]) expected = np.unique(raw, return_counts=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, indices, inverse, counts = unique(x, return_index=True, return_inverse=True, return_counts=True) res = self.executor.execute_tensors([y, indices, inverse, counts]) expected = np.unique(raw, return_index=True, return_inverse=True, return_counts=True) self.assertEqual(len(res), 4) self.assertEqual(len(expected), 4) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) np.testing.assert_array_equal(res[2], expected[2]) np.testing.assert_array_equal(res[3], expected[3]) y, indices, counts = unique(x, return_index=True, return_counts=True) res = self.executor.execute_tensors([y, indices, counts]) expected = np.unique(raw, return_index=True, return_counts=True) self.assertEqual(len(res), 3) self.assertEqual(len(expected), 3) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) np.testing.assert_array_equal(res[2], expected[2]) raw2 = rs.randint(10, size=(4, 5, 6)) x2 = tensor(raw2, chunk_size=chunk_size) y2 = unique(x2) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2) np.testing.assert_array_equal(res, expected) y2 = unique(x2, axis=1) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2, axis=1) np.testing.assert_array_equal(res, expected) y2 = unique(x2, axis=2) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2, axis=2) np.testing.assert_array_equal(res, expected) raw = rs.randint(10, size=(10, 20)) raw[:, 0] = raw[:, 11] = rs.randint(10, size=(10, )) x = tensor(raw, chunk_size=2) y, ind, inv, counts = unique(x, aggregate_size=3, axis=1, return_index=True, return_inverse=True, return_counts=True) res_unique, res_ind, res_inv, res_counts = self.executor.execute_tensors( (y, ind, inv, counts)) exp_unique, exp_ind, exp_counts = np.unique(raw, axis=1, return_index=True, return_counts=True) raw_res_unique = res_unique res_unique_df = pd.DataFrame(res_unique) res_unique_ind = np.asarray( res_unique_df.sort_values(list(range(res_unique.shape[0])), axis=1).columns) res_unique = res_unique[:, res_unique_ind] res_ind = res_ind[res_unique_ind] res_counts = res_counts[res_unique_ind] np.testing.assert_array_equal(res_unique, exp_unique) np.testing.assert_array_equal(res_ind, exp_ind) np.testing.assert_array_equal(raw_res_unique[:, res_inv], raw) np.testing.assert_array_equal(res_counts, exp_counts) x = (mt.random.RandomState(0).rand(1000, chunk_size=20) > 0.5).astype( np.int32) y = unique(x) res = np.sort(self.executor.execute_tensor(y, concat=True)[0]) np.testing.assert_array_equal(res, np.array([0, 1])) @require_cupy def testToGPUExecution(self): raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=3) gx = to_gpu(x) res = self.executor.execute_tensor(gx, concat=True)[0] np.testing.assert_array_equal(res.get(), raw) @require_cupy def testToCPUExecution(self): raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=3, gpu=True) cx = to_cpu(x) res = self.executor.execute_tensor(cx, concat=True)[0] np.testing.assert_array_equal(res, raw) def testSortExecution(self): # only 1 chunk when axis = -1 raw = np.random.rand(100, 10) x = tensor(raw, chunk_size=10) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # 1-d chunk raw = np.random.rand(100) x = tensor(raw, chunk_size=10) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # test force need_align=True sx = sort(x) sx.op._need_align = True res = self.executor.execute_tensor(sx, concat=True)[0] self.assertEqual(get_tiled(sx).nsplits, get_tiled(x).nsplits) np.testing.assert_array_equal(res, np.sort(raw)) # test psrs_kinds sx = sort(x, psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # structured dtype raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=100, dtype=np.int32) raw['size'] = np.random.randint(1000, size=100, dtype=np.int64) x = tensor(raw, chunk_size=10) sx = sort(x, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id'])) # test psrs_kinds with structured dtype sx = sort(x, order=['size', 'id'], psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id'])) # test flatten case raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=5) sx = sort(x, axis=None) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=None)) # test multi-dimension raw = np.random.rand(10, 100) x = tensor(raw, chunk_size=(2, 10)) sx = sort(x, psrs_kinds=['quicksort'] * 3) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) raw = np.random.rand(10, 99) x = tensor(raw, chunk_size=(2, 10)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # test 3-d raw = np.random.rand(20, 25, 28) x = tensor(raw, chunk_size=(10, 5, 7)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, axis=0) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=0)) sx = sort(x, axis=0, psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=0)) sx = sort(x, axis=1) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=1)) sx = sort(x, axis=1, psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=1)) # test multi-dimension with structured type raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32) raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64) x = tensor(raw, chunk_size=(3, 10)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id'])) sx = sort(x, order=['size']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size'])) sx = sort(x, axis=0, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal( res, np.sort(raw, axis=0, order=['size', 'id'])) sx = sort(x, axis=0, order=['size', 'id'], psrs_kinds=[None, None, 'quicksort']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal( res, np.sort(raw, axis=0, order=['size', 'id'])) raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(5, 4)) a.sort(axis=1) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=1)) a.sort(axis=0) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, np.sort(np.sort(raw, axis=1), axis=0)) def testPartitionExecution(self): # only 1 chunk when axis = -1 raw = np.random.rand(100, 10) x = tensor(raw, chunk_size=10) px = partition(x, [1, 8]) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res, np.partition(raw, [1, 8])) # 1-d chunk raw = np.random.rand(100) x = tensor(raw, chunk_size=10) kth = np.random.RandomState(0).randint(-100, 100, size=(10, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth)[kth]) # structured dtype raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=100, dtype=np.int32) raw['size'] = np.random.randint(1000, size=100, dtype=np.int64) x = tensor(raw, chunk_size=10) px = partition(x, kth, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[kth], np.partition(raw, kth, order=['size', 'id'])[kth]) # test flatten case raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=5) px = partition(x, kth, axis=None) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth, axis=None)[kth]) # test multi-dimension raw = np.random.rand(10, 100) x = tensor(raw, chunk_size=(2, 10)) kth = np.random.RandomState(0).randint(-10, 10, size=(3, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) raw = np.random.rand(10, 99) x = tensor(raw, chunk_size=(2, 10)) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) # test 3-d raw = np.random.rand(20, 25, 28) x = tensor(raw, chunk_size=(10, 5, 7)) kth = np.random.RandomState(0).randint(-28, 28, size=(3, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, :, kth], np.partition(raw, kth)[:, :, kth]) kth = np.random.RandomState(0).randint(-20, 20, size=(3, )) px = partition(x, kth, axis=0) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth, axis=0)[kth]) kth = np.random.RandomState(0).randint(-25, 25, size=(3, )) px = partition(x, kth, axis=1) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth, axis=1)[:, kth]) # test multi-dimension with structured type raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32) raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64) x = tensor(raw, chunk_size=(3, 10)) kth = np.random.RandomState(0).randint(-100, 100, size=(10, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) px = partition(x, kth, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[:, kth], np.partition(raw, kth, order=['size', 'id'])[:, kth]) px = partition(x, kth, order=['size']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[:, kth], np.partition(raw, kth, order=['size'])[:, kth]) kth = np.random.RandomState(0).randint(-10, 10, size=(5, )) px = partition(x, kth, axis=0, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[kth], np.partition(raw, kth, axis=0, order=['size', 'id'])[kth]) raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(5, 4)) kth = np.random.RandomState(0).randint(-12, 12, size=(2, )) a.partition(kth, axis=1) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth, axis=1)[:, kth]) kth = np.random.RandomState(0).randint(-10, 10, size=(2, )) a.partition(kth, axis=0) raw_base = res res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw_base, kth, axis=0)[kth]) # test kth which is tensor raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(3, 5)) kth = (mt.random.rand(5) * 24 - 12).astype(int) px = partition(a, kth) sx = sort(a) res = self.executor.execute_tensor(px, concat=True)[0] kth_res = self.executor.execute_tensor(kth, concat=True)[0] sort_res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res]) a = tensor(raw, chunk_size=(10, 12)) kth = (mt.random.rand(5) * 24 - 12).astype(int) px = partition(a, kth) sx = sort(a) res = self.executor.execute_tensor(px, concat=True)[0] kth_res = self.executor.execute_tensor(kth, concat=True)[0] sort_res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def _nan_equal(self, a, b): try: np.testing.assert_equal(a, b) except AssertionError: return False return True def testBaseExecution(self): arr = ones((10, 8), chunk_size=2) arr2 = arr + 1 res = self.executor.execute_tensor(arr2) self.assertTrue((res[0] == np.ones((2, 2)) + 1).all()) data = np.random.random((10, 8, 3)) arr = tensor(data, chunk_size=2) arr2 = arr + 1 res = self.executor.execute_tensor(arr2) self.assertTrue((res[0] == data[:2, :2, :2] + 1).all()) def testBaseOrderExecution(self): raw = np.asfortranarray(np.random.rand(5, 6)) arr = tensor(raw, chunk_size=3) res = self.executor.execute_tensor(arr + 1, concat=True)[0] np.testing.assert_array_equal(res, raw + 1) self.assertFalse(res.flags['C_CONTIGUOUS']) self.assertTrue(res.flags['F_CONTIGUOUS']) res2 = self.executor.execute_tensor(add(arr, 1, order='C'), concat=True)[0] np.testing.assert_array_equal(res2, np.add(raw, 1, order='C')) self.assertTrue(res2.flags['C_CONTIGUOUS']) self.assertFalse(res2.flags['F_CONTIGUOUS']) @staticmethod def _get_func(op): if isinstance(op, str): return getattr(np, op) return op def testUfuncExecution(self): from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \ invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp _sp_unary_ufunc = {arccosh, invert} _sp_bin_ufunc = {mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp} data1 = np.random.random((5, 9, 4)) data2 = np.random.random((5, 9, 4)) rand = np.random.random() arr1 = tensor(data1, chunk_size=3) arr2 = tensor(data2, chunk_size=3) _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc for func in _new_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_func(res_tensor.op._func_name)(data1) self.assertTrue(np.allclose(res[0], expected)) _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc for func in _new_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1) self.assertTrue(np.allclose(res1[0], expected1)) self.assertTrue(np.allclose(res2[0], expected2)) self.assertTrue(np.allclose(res3[0], expected3)) data1 = np.random.randint(2, 10, size=(10, 10, 10)) data2 = np.random.randint(2, 10, size=(10, 10, 10)) rand = np.random.randint(1, 10) arr1 = tensor(data1, chunk_size=6) arr2 = tensor(data2, chunk_size=6) for func in _sp_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_func(res_tensor.op._func_name)(data1) self.assertTrue(np.allclose(res[0], expected)) for func in _sp_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1) self.assertTrue(np.allclose(res1[0], expected1)) self.assertTrue(np.allclose(res2[0], expected2)) self.assertTrue(np.allclose(res3[0], expected3)) @staticmethod def _get_sparse_func(op): from mars.lib.sparse.core import issparse if isinstance(op, str): op = getattr(np, op) def func(*args): new_args = [] for arg in args: if issparse(arg): new_args.append(arg.toarray()) else: new_args.append(arg) return op(*new_args) return func @staticmethod def toarray(x): if hasattr(x, 'toarray'): return x.toarray() return x @ignore_warning def testSparseUfuncExexution(self): from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \ invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp _sp_unary_ufunc = {arccosh, invert} _sp_bin_ufunc = {mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp} data1 = sps.random(5, 9, density=.1) data2 = sps.random(5, 9, density=.2) rand = np.random.random() arr1 = tensor(data1, chunk_size=3) arr2 = tensor(data2, chunk_size=3) _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc for func in _new_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_sparse_func(res_tensor.op._func_name)(data1) self._nan_equal(self.toarray(res[0]), expected) _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc for func in _new_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1) self._nan_equal(self.toarray(res1[0]), expected1) self._nan_equal(self.toarray(res2[0]), expected2) self._nan_equal(self.toarray(res3[0]), expected3) data1 = np.random.randint(2, 10, size=(10, 10)) data2 = np.random.randint(2, 10, size=(10, 10)) rand = np.random.randint(1, 10) arr1 = tensor(data1, chunk_size=3).tosparse() arr2 = tensor(data2, chunk_size=3).tosparse() for func in _sp_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_sparse_func(res_tensor.op._func_name)(data1) self._nan_equal(self.toarray(res[0]), expected) for func in _sp_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1) self._nan_equal(self.toarray(res1[0]), expected1) self._nan_equal(self.toarray(res2[0]), expected2) self._nan_equal(self.toarray(res3[0]), expected3) def testAddWithOutExecution(self): data1 = np.random.random((5, 9, 4)) data2 = np.random.random((9, 4)) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) add(arr1, arr2, out=arr1) res = self.executor.execute_tensor(arr1, concat=True)[0] self.assertTrue(np.array_equal(res, data1 + data2)) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) arr3 = add(arr1, arr2, out=arr1.astype('i4'), casting='unsafe') res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, (data1 + data2).astype('i4')) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) arr3 = truediv(arr1, arr2, out=arr1, where=arr2 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] self.assertTrue(np.array_equal( res, np.true_divide(data1, data2, out=data1.copy(), where=data2 > .5))) arr1 = tensor(data1.copy(), chunk_size=4) arr2 = tensor(data2.copy(), chunk_size=4) arr3 = add(arr1, arr2, where=arr1 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data1, data2, where=data1 > .5) self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5])) arr1 = tensor(data1.copy(), chunk_size=4) arr3 = add(arr1, 1, where=arr1 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data1, 1, where=data1 > .5) self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5])) arr1 = tensor(data2.copy(), chunk_size=3) arr3 = add(arr1[:5, :], 1, out=arr1[-5:, :]) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data2[:5, :], 1) self.assertTrue(np.array_equal(res, expected)) def testArctan2Execution(self): x = tensor(1) # scalar y = arctan2(x, x) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(1, 1)) y = arctan2(0, x) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(0, 1)) raw1 = np.array([[0, 1, 2]]) raw2 = sps.csr_matrix([[0, 1, 0]]) y = arctan2(raw1, raw2) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(raw1, raw2.A)) y = arctan2(raw2, raw2) self.assertTrue(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(raw2.A, raw2.A)) y = arctan2(0, raw2) self.assertTrue(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(0, raw2.A)) def testFrexpExecution(self): data1 = np.random.random((5, 9, 4)) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = frexp(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1)) self.assertTrue(np.allclose(res, expected)) arr1 = tensor(data1.copy(), chunk_size=3) o1 = zeros(data1.shape, chunk_size=3) o2 = zeros(data1.shape, dtype='i8', chunk_size=3) frexp(arr1, o1, o2) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1)) self.assertTrue(np.allclose(res, expected)) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = frexp(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1.toarray())) np.testing.assert_equal(res.toarray(), expected) def testFrexpOrderExecution(self): data1 = np.random.random((5, 9)) t = tensor(data1, chunk_size=3) o1, o2 = frexp(t, order='F') res1, res2 = self.executor.execute_tileables([o1, o2]) expected1, expected2 = np.frexp(data1, order='F') np.testing.assert_allclose(res1, expected1) self.assertTrue(res1.flags['F_CONTIGUOUS']) self.assertFalse(res1.flags['C_CONTIGUOUS']) np.testing.assert_allclose(res2, expected2) self.assertTrue(res2.flags['F_CONTIGUOUS']) self.assertFalse(res2.flags['C_CONTIGUOUS']) def testModfExecution(self): data1 = np.random.random((5, 9)) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = modf(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1)) self.assertTrue(np.allclose(res, expected)) o1, o2 = modf([0, 3.5]) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf([0, 3.5])) self.assertTrue(np.allclose(res, expected)) arr1 = tensor(data1.copy(), chunk_size=3) o1 = zeros(data1.shape, chunk_size=3) o2 = zeros(data1.shape, chunk_size=3) modf(arr1, o1, o2) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1)) self.assertTrue(np.allclose(res, expected)) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = modf(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1.toarray())) np.testing.assert_equal(res.toarray(), expected) def testModfOrderExecution(self): data1 = np.random.random((5, 9)) t = tensor(data1, chunk_size=3) o1, o2 = modf(t, order='F') res1, res2 = self.executor.execute_tileables([o1, o2]) expected1, expected2 = np.modf(data1, order='F') np.testing.assert_allclose(res1, expected1) self.assertTrue(res1.flags['F_CONTIGUOUS']) self.assertFalse(res1.flags['C_CONTIGUOUS']) np.testing.assert_allclose(res2, expected2) self.assertTrue(res2.flags['F_CONTIGUOUS']) self.assertFalse(res2.flags['C_CONTIGUOUS']) def testClipExecution(self): a_data = np.arange(10) a = tensor(a_data.copy(), chunk_size=3) b = clip(a, 1, 8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, 1, 8) self.assertTrue(np.array_equal(res, expected)) a = tensor(a_data.copy(), chunk_size=3) clip(a, 3, 6, out=a) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.clip(a_data, 3, 6) self.assertTrue(np.array_equal(res, expected)) a = tensor(a_data.copy(), chunk_size=3) a_min_data = np.random.randint(1, 10, size=(10,)) a_max_data = np.random.randint(1, 10, size=(10,)) a_min = tensor(a_min_data) a_max = tensor(a_max_data) clip(a, a_min, a_max, out=a) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.clip(a_data, a_min_data, a_max_data) self.assertTrue(np.array_equal(res, expected)) with option_context() as options: options.chunk_size = 3 a = tensor(a_data.copy(), chunk_size=3) b = clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) self.assertTrue(np.array_equal(res, expected)) # test sparse clip a_data = sps.csr_matrix([[0, 2, 8], [0, 0, -1]]) a = tensor(a_data, chunk_size=3) b_data = sps.csr_matrix([[0, 3, 0], [1, 0, -2]]) c = clip(a, b_data, 4) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.clip(a_data.toarray(), b_data.toarray(), 4) self.assertTrue(np.array_equal(res.toarray(), expected)) def testClipOrderExecution(self): a_data = np.asfortranarray(np.random.rand(4, 8)) a = tensor(a_data, chunk_size=3) b = clip(a, 0.2, 0.8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, 0.2, 0.8) np.testing.assert_allclose(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testAroundExecution(self): data = np.random.randn(10, 20) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data, decimals=2) np.testing.assert_allclose(res, expected) data = sps.random(10, 20, density=.2) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data.toarray(), decimals=2) np.testing.assert_allclose(res.toarray(), expected) def testAroundOrderExecution(self): data = np.asfortranarray(np.random.rand(10, 20)) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data, decimals=2) np.testing.assert_allclose(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testCosOrderExecution(self): data = np.asfortranarray(np.random.rand(3, 5)) x = tensor(data, chunk_size=2) t = cos(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, np.cos(data)) self.assertFalse(res.flags['C_CONTIGUOUS']) self.assertTrue(res.flags['F_CONTIGUOUS']) t2 = cos(x, order='C') res2 = self.executor.execute_tensor(t2, concat=True)[0] np.testing.assert_allclose(res2, np.cos(data, order='C')) self.assertTrue(res2.flags['C_CONTIGUOUS']) self.assertFalse(res2.flags['F_CONTIGUOUS']) def testIsCloseExecution(self): data = np.array([1.05, 1.0, 1.01, np.nan]) data2 = np.array([1.04, 1.0, 1.03, np.nan]) x = tensor(data, chunk_size=2) y = tensor(data2, chunk_size=3) z = isclose(x, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, data2, atol=.01) np.testing.assert_equal(res, expected) z = isclose(x, y, atol=.01, equal_nan=True) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, data2, atol=.01, equal_nan=True) np.testing.assert_equal(res, expected) # test tensor with scalar z = isclose(x, 1.0, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, 1.0, atol=.01) np.testing.assert_equal(res, expected) z = isclose(1.0, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(1.0, data2, atol=.01) np.testing.assert_equal(res, expected) z = isclose(1.0, 2.0, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(1.0, 2.0, atol=.01) np.testing.assert_equal(res, expected) # test sparse data = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) data2 = sps.csr_matrix(np.array([0, 1.0, 1.03, np.nan])) x = tensor(data, chunk_size=2) y = tensor(data2, chunk_size=3) z = isclose(x, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data.toarray(), data2.toarray(), atol=.01) np.testing.assert_equal(res, expected) z = isclose(x, y, atol=.01, equal_nan=True) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data.toarray(), data2.toarray(), atol=.01, equal_nan=True) np.testing.assert_equal(res, expected) @ignore_warning def testDtypeExecution(self): a = ones((10, 20), dtype='f4', chunk_size=5) c = truediv(a, 2, dtype='f8') res = self.executor.execute_tensor(c, concat=True)[0] self.assertEqual(res.dtype, np.float64) c = truediv(a, 0, dtype='f8') res = self.executor.execute_tensor(c, concat=True)[0] self.assertTrue(np.isinf(res[0, 0])) with self.assertRaises(FloatingPointError): with np.errstate(divide='raise'): c = truediv(a, 0, dtype='f8') _ = self.executor.execute_tensor(c, concat=True)[0] # noqa: F841 def testSetGetRealExecution(self): a_data = np.array([1+2j, 3+4j, 5+6j]) a = tensor(a_data, chunk_size=2) res = self.executor.execute_tensor(a.real, concat=True)[0] expected = a_data.real np.testing.assert_equal(res, expected) a.real = 9 res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.real = 9 np.testing.assert_equal(res, expected) a.real = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.real = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) # test sparse a_data = np.array([[1+2j, 3+4j, 0], [0, 0, 0]]) a = tensor(sps.csr_matrix(a_data)) res = self.executor.execute_tensor(a.real, concat=True)[0].toarray() expected = a_data.real np.testing.assert_equal(res, expected) a.real = 9 res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.real = 9 np.testing.assert_equal(res, expected) a.real = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.real = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) def testSetGetImagExecution(self): a_data = np.array([1+2j, 3+4j, 5+6j]) a = tensor(a_data, chunk_size=2) res = self.executor.execute_tensor(a.imag, concat=True)[0] expected = a_data.imag np.testing.assert_equal(res, expected) a.imag = 9 res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.imag = 9 np.testing.assert_equal(res, expected) a.imag = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.imag = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) # test sparse a_data = np.array([[1+2j, 3+4j, 0], [0, 0, 0]]) a = tensor(sps.csr_matrix(a_data)) res = self.executor.execute_tensor(a.imag, concat=True)[0].toarray() expected = a_data.imag np.testing.assert_equal(res, expected) a.imag = 9 res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.imag = 9 np.testing.assert_equal(res, expected) a.imag = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.imag = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) @require_cupy def testCupyExecution(self): a_data = np.random.rand(10, 10) b_data = np.random.rand(10, 10) a = tensor(a_data, gpu=True, chunk_size=3) b = tensor(b_data, gpu=True, chunk_size=3) res_binary = self.executor.execute_tensor((a + b), concat=True)[0] np.testing.assert_array_equal(res_binary.get(), (a_data + b_data)) res_unary = self.executor.execute_tensor(cos(a), concat=True)[0] np.testing.assert_array_almost_equal(res_unary.get(), np.cos(a_data))
def testTensordotExecution(self): size_executor = ExecutorForTest( sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) a_data = np.arange(60).reshape(3, 4, 5) a = tensor(a_data, chunk_size=2) b_data = np.arange(24).reshape(4, 3, 2) b = tensor(b_data, chunk_size=2) axes = ([1, 0], [0, 1]) c = tensordot(a, b, axes=axes) size_res = size_executor.execute_tensor(c, mock=True) self.assertEqual(sum(s[0] for s in size_res), c.nbytes) self.assertEqual(sum(s[1] for s in size_res), c.nbytes) res = self.executor.execute_tensor(c) expected = np.tensordot(a_data, b_data, axes=axes) self.assertTrue(np.array_equal(res[0], expected[:2, :])) self.assertTrue(np.array_equal(res[1], expected[2:4, :])) self.assertTrue(np.array_equal(res[2], expected[4:, :])) a = ones((1000, 2000), chunk_size=500) b = ones((2000, 100), chunk_size=500) c = dot(a, b) res = self.executor.execute_tensor(c) expected = np.dot(np.ones((1000, 2000)), np.ones((2000, 100))) self.assertEqual(len(res), 2) self.assertTrue(np.array_equal(res[0], expected[:500, :])) self.assertTrue(np.array_equal(res[1], expected[500:, :])) a = ones((10, 8), chunk_size=2) b = ones((8, 10), chunk_size=2) c = a.dot(b) res = self.executor.execute_tensor(c) self.assertEqual(len(res), 25) for r in res: self.assertTrue(np.array_equal(r, np.tile([8], [2, 2]))) a = ones((500, 500), chunk_size=500) b = ones((500, 100), chunk_size=500) c = a.dot(b) res = self.executor.execute_tensor(c) self.assertTrue(np.array_equal(res[0], np.tile([500], [500, 100]))) raw_a = np.random.random((100, 200, 50)) raw_b = np.random.random((200, 10, 100)) a = tensor(raw_a, chunk_size=50) b = tensor(raw_b, chunk_size=33) c = tensordot(a, b, axes=((0, 1), (2, 0))) res = self.executor.execute_tensor(c, concat=True) expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes)) self.assertTrue(np.allclose(res[0], expected)) a = ones((1000, 2000), chunk_size=500) b = ones((100, 2000), chunk_size=500) c = inner(a, b) res = self.executor.execute_tensor(c) expected = np.inner(np.ones((1000, 2000)), np.ones((100, 2000))) self.assertEqual(len(res), 2) self.assertTrue(np.array_equal(res[0], expected[:500, :])) self.assertTrue(np.array_equal(res[1], expected[500:, :])) a = ones((100, 100), chunk_size=30) b = ones((100, 100), chunk_size=30) c = a.dot(b) res = self.executor.execute_tensor(c, concat=True)[0] np.testing.assert_array_equal(res, np.ones((100, 100)) * 100)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest('numpy') def testCreateSparseExecution(self): mat = sps.csr_matrix([[0, 0, 2], [2, 0, 0]]) t = tensor(mat, dtype='f8', chunk_size=2) res = self.executor.execute_tensor(t) self.assertIsInstance(res[0], SparseNDArray) self.assertEqual(res[0].dtype, np.float64) np.testing.assert_array_equal(res[0].toarray(), mat[..., :2].toarray()) np.testing.assert_array_equal(res[1].toarray(), mat[..., 2:].toarray()) t2 = ones_like(t, dtype='f4') res = self.executor.execute_tensor(t2) expected = sps.csr_matrix([[0, 0, 1], [1, 0, 0]]) self.assertIsInstance(res[0], SparseNDArray) self.assertEqual(res[0].dtype, np.float32) np.testing.assert_array_equal(res[0].toarray(), expected[..., :2].toarray()) np.testing.assert_array_equal(res[1].toarray(), expected[..., 2:].toarray()) t3 = tensor(np.array([[0, 0, 2], [2, 0, 0]]), chunk_size=2).tosparse() res = self.executor.execute_tensor(t3) self.assertIsInstance(res[0], SparseNDArray) self.assertEqual(res[0].dtype, np.int_) np.testing.assert_array_equal(res[0].toarray(), mat[..., :2].toarray()) np.testing.assert_array_equal(res[1].toarray(), mat[..., 2:].toarray()) def testZerosExecution(self): t = zeros((20, 30), dtype='i8', chunk_size=5) res = self.executor.execute_tensor(t, concat=True) np.testing.assert_array_equal(res[0], np.zeros((20, 30), dtype='i8')) self.assertEqual(res[0].dtype, np.int64) t2 = zeros_like(t) res = self.executor.execute_tensor(t2, concat=True) np.testing.assert_array_equal(res[0], np.zeros((20, 30), dtype='i8')) self.assertEqual(res[0].dtype, np.int64) t = zeros((20, 30), dtype='i4', chunk_size=5, sparse=True) res = self.executor.execute_tensor(t, concat=True) self.assertEqual(res[0].nnz, 0) t = zeros((20, 30), dtype='i8', chunk_size=6, order='F') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.zeros((20, 30), dtype='i8', order='F') self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testEmptyExecution(self): t = empty((20, 30), dtype='i8', chunk_size=5) res = self.executor.execute_tensor(t, concat=True) self.assertEqual(res[0].shape, (20, 30)) self.assertEqual(res[0].dtype, np.int64) t = empty((20, 30), chunk_size=5) res = self.executor.execute_tensor(t, concat=True) self.assertEqual(res[0].shape, (20, 30)) self.assertEqual(res[0].dtype, np.float64) t2 = empty_like(t) res = self.executor.execute_tensor(t2, concat=True) self.assertEqual(res[0].shape, (20, 30)) self.assertEqual(res[0].dtype, np.float64) t = empty((20, 30), dtype='i8', chunk_size=5, order='F') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.empty((20, 30), dtype='i8', order='F') self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testFullExecution(self): t = full((2, 2), 1, dtype='f4', chunk_size=1) res = self.executor.execute_tensor(t, concat=True) np.testing.assert_array_equal(res[0], np.full((2, 2), 1, dtype='f4')) t = full((2, 2), [1, 2], dtype='f8', chunk_size=1) res = self.executor.execute_tensor(t, concat=True) np.testing.assert_array_equal(res[0], np.full((2, 2), [1, 2], dtype='f8')) t = full((2, 2), 1, dtype='f4', chunk_size=1, order='F') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.full((2, 2), 1, dtype='f4', order='F') self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) t2 = full_like(t, 10, order='F') res = self.executor.execute_tensor(t2, concat=True)[0] expected = np.full((2, 2), 10, dtype='f4', order='F') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testArangeExecution(self): t = arange(1, 20, 3, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.array_equal(res, np.arange(1, 20, 3))) t = arange(1, 20, .3, chunk_size=4) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.arange(1, 20, .3) self.assertTrue(np.allclose(res, expected)) t = arange(1.0, 1.8, .3, chunk_size=4) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.arange(1.0, 1.8, .3) self.assertTrue(np.allclose(res, expected)) t = arange('1066-10-13', '1066-10-31', dtype=np.datetime64, chunk_size=3) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.arange('1066-10-13', '1066-10-31', dtype=np.datetime64) self.assertTrue(np.array_equal(res, expected)) def testDiagExecution(self): # 2-d 6 * 6 a = arange(36, chunk_size=2).reshape(6, 6) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(6, 6)) np.testing.assert_equal(res, expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(6, 6), k=1) np.testing.assert_equal(res, expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(6, 6), k=3) np.testing.assert_equal(res, expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(6, 6), k=-2) np.testing.assert_equal(res, expected) d = diag(a, k=-5) res = self.executor.execute_tensor(d)[0] expected = np.diag(np.arange(36).reshape(6, 6), k=-5) np.testing.assert_equal(res, expected) # 2-d 6 * 6 sparse, no tensor a = sps.rand(6, 6, density=.1) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray()) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=1) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=3) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=-2) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-5) res = self.executor.execute_tensor(d)[0] expected = np.diag(a.toarray(), k=-5) np.testing.assert_equal(res.toarray(), expected) # 2-d 6 * 6 sparse, from tensor raw_a = sps.rand(6, 6, density=.1) a = tensor(raw_a, chunk_size=2) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray()) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=1) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=3) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=-2) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-5) res = self.executor.execute_tensor(d)[0] expected = np.diag(raw_a.toarray(), k=-5) np.testing.assert_equal(res.toarray(), expected) # 2-d 4 * 9 a = arange(36, chunk_size=2).reshape(4, 9) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(4, 9)) np.testing.assert_equal(res, expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(4, 9), k=1) np.testing.assert_equal(res, expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(4, 9), k=3) np.testing.assert_equal(res, expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(36).reshape(4, 9), k=-2) np.testing.assert_equal(res, expected) d = diag(a, k=-3) res = self.executor.execute_tensor(d)[0] expected = np.diag(np.arange(36).reshape(4, 9), k=-3) np.testing.assert_equal(res, expected) # 2-d 4 * 9 sparse, no tensor a = sps.rand(4, 9, density=.1) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray()) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=1) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=3) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(a.toarray(), k=-2) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-3) res = self.executor.execute_tensor(d)[0] expected = np.diag(a.toarray(), k=-3) np.testing.assert_equal(res.toarray(), expected) # 2-d 4 * 9 sparse, from tensor raw_a = sps.rand(4, 9, density=.1) a = tensor(raw_a, chunk_size=2) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray()) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=1) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=3) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(raw_a.toarray(), k=-2) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-3) res = self.executor.execute_tensor(d)[0] expected = np.diag(raw_a.toarray(), k=-3) np.testing.assert_equal(res.toarray(), expected) # 1-d a = arange(5, chunk_size=2) d = diag(a) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5)) np.testing.assert_equal(res, expected) self.assertTrue(res.flags['C_CONTIGUOUS']) self.assertFalse(res.flags['F_CONTIGUOUS']) d = diag(a, k=1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=1) np.testing.assert_equal(res, expected) d = diag(a, k=3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=3) np.testing.assert_equal(res, expected) d = diag(a, k=-2) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=-2) np.testing.assert_equal(res, expected) d = diag(a, k=-3) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=-3) np.testing.assert_equal(res, expected) d = diag(a, sparse=True) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5)) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=1, sparse=True) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=2, sparse=True) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-2, sparse=True) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=-2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) d = diag(a, k=-3, sparse=True) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.diag(np.arange(5), k=-3) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) def testDiagflatExecution(self): a = diagflat([[1, 2], [3, 4]], chunk_size=1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.diagflat([[1, 2], [3, 4]]) np.testing.assert_equal(res, expected) d = tensor([[1, 2], [3, 4]], chunk_size=1) a = diagflat(d) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.diagflat([[1, 2], [3, 4]]) np.testing.assert_equal(res, expected) a = diagflat([1, 2], 1, chunk_size=1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.diagflat([1, 2], 1) np.testing.assert_equal(res, expected) d = tensor([[1, 2]], chunk_size=1) a = diagflat(d, 1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.diagflat([1, 2], 1) np.testing.assert_equal(res, expected) def testEyeExecution(self): t = eye(5, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5) np.testing.assert_equal(res, expected) t = eye(5, k=1, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=1) np.testing.assert_equal(res, expected) t = eye(5, k=2, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=2) np.testing.assert_equal(res, expected) t = eye(5, k=-1, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=-1) np.testing.assert_equal(res, expected) t = eye(5, k=-3, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=-3) np.testing.assert_equal(res, expected) t = eye(5, M=3, k=1, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=3, k=1) np.testing.assert_equal(res, expected) t = eye(5, M=3, k=-3, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=3, k=-3) np.testing.assert_equal(res, expected) t = eye(5, M=7, k=1, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=7, k=1) np.testing.assert_equal(res, expected) t = eye(5, M=8, k=-3, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=8, k=-3) np.testing.assert_equal(res, expected) t = eye(2, dtype=int) res = self.executor.execute_tensor(t, concat=True)[0] self.assertEqual(res.dtype, np.int_) # test sparse t = eye(5, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, k=1, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, k=2, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, k=-1, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=-1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, k=-3, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, k=-3) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, M=3, k=1, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=3, k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, M=3, k=-3, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=3, k=-3) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, M=7, k=1, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=7, k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, M=8, k=-3, sparse=True, chunk_size=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.eye(5, M=8, k=-3) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res.toarray(), expected) t = eye(5, M=9, k=-3, chunk_size=2, order='F') res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(res.flags['C_CONTIGUOUS']) self.assertFalse(res.flags['F_CONTIGUOUS']) def testLinspaceExecution(self): a = linspace(2.0, 9.0, num=11, chunk_size=3) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.linspace(2.0, 9.0, num=11) np.testing.assert_allclose(res, expected) a = linspace(2.0, 9.0, num=11, endpoint=False, chunk_size=3) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.linspace(2.0, 9.0, num=11, endpoint=False) np.testing.assert_allclose(res, expected) a = linspace(2.0, 9.0, num=11, chunk_size=3, dtype=int) res = self.executor.execute_tensor(a, concat=True)[0] self.assertEqual(res.dtype, np.int_) def testMeshgridExecution(self): a = arange(5, chunk_size=2) b = arange(6, 12, chunk_size=3) c = arange(12, 19, chunk_size=4) A, B, C = meshgrid(a, b, c) A_res = self.executor.execute_tensor(A, concat=True)[0] A_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19))[0] np.testing.assert_equal(A_res, A_expected) B_res = self.executor.execute_tensor(B, concat=True)[0] B_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19))[1] np.testing.assert_equal(B_res, B_expected) C_res = self.executor.execute_tensor(C, concat=True)[0] C_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19))[2] np.testing.assert_equal(C_res, C_expected) A, B, C = meshgrid(a, b, c, indexing='ij') A_res = self.executor.execute_tensor(A, concat=True)[0] A_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij')[0] np.testing.assert_equal(A_res, A_expected) B_res = self.executor.execute_tensor(B, concat=True)[0] B_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij')[1] np.testing.assert_equal(B_res, B_expected) C_res = self.executor.execute_tensor(C, concat=True)[0] C_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij')[2] np.testing.assert_equal(C_res, C_expected) A, B, C = meshgrid(a, b, c, sparse=True) A_res = self.executor.execute_tensor(A, concat=True)[0] A_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), sparse=True)[0] np.testing.assert_equal(A_res, A_expected) B_res = self.executor.execute_tensor(B, concat=True)[0] B_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), sparse=True)[1] np.testing.assert_equal(B_res, B_expected) C_res = self.executor.execute_tensor(C, concat=True)[0] C_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), sparse=True)[2] np.testing.assert_equal(C_res, C_expected) A, B, C = meshgrid(a, b, c, indexing='ij', sparse=True) A_res = self.executor.execute_tensor(A, concat=True)[0] A_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij', sparse=True)[0] np.testing.assert_equal(A_res, A_expected) B_res = self.executor.execute_tensor(B, concat=True)[0] B_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij', sparse=True)[1] np.testing.assert_equal(B_res, B_expected) C_res = self.executor.execute_tensor(C, concat=True)[0] C_expected = np.meshgrid(np.arange(5), np.arange(6, 12), np.arange(12, 19), indexing='ij', sparse=True)[2] np.testing.assert_equal(C_res, C_expected) def testIndicesExecution(self): grid = indices((2, 3), chunk_size=1) res = self.executor.execute_tensor(grid, concat=True)[0] expected = np.indices((2, 3)) np.testing.assert_equal(res, expected) res = self.executor.execute_tensor(grid[0], concat=True)[0] np.testing.assert_equal(res, expected[0]) res = self.executor.execute_tensor(grid[1], concat=True)[0] np.testing.assert_equal(res, expected[1]) def testTriuExecution(self): a = arange(24, chunk_size=2).reshape(2, 3, 4) t = triu(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(24).reshape(2, 3, 4)) np.testing.assert_equal(res, expected) t = triu(a, k=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(24).reshape(2, 3, 4), k=1) np.testing.assert_equal(res, expected) t = triu(a, k=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(24).reshape(2, 3, 4), k=2) np.testing.assert_equal(res, expected) t = triu(a, k=-1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(24).reshape(2, 3, 4), k=-1) np.testing.assert_equal(res, expected) t = triu(a, k=-2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(24).reshape(2, 3, 4), k=-2) np.testing.assert_equal(res, expected) # test sparse a = arange(12, chunk_size=2).reshape(3, 4).tosparse() t = triu(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(12).reshape(3, 4)) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = triu(a, k=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(12).reshape(3, 4), k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = triu(a, k=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(12).reshape(3, 4), k=2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = triu(a, k=-1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(12).reshape(3, 4), k=-1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = triu(a, k=-2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(np.arange(12).reshape(3, 4), k=-2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) raw = np.asfortranarray(np.random.rand(10, 7)) a = tensor(raw, chunk_size=3) t = triu(a, k=-2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.triu(raw, k=-2) np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testTrilExecution(self): a = arange(24, chunk_size=2).reshape(2, 3, 4) t = tril(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(24).reshape(2, 3, 4)) np.testing.assert_equal(res, expected) t = tril(a, k=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(24).reshape(2, 3, 4), k=1) np.testing.assert_equal(res, expected) t = tril(a, k=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(24).reshape(2, 3, 4), k=2) np.testing.assert_equal(res, expected) t = tril(a, k=-1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(24).reshape(2, 3, 4), k=-1) np.testing.assert_equal(res, expected) t = tril(a, k=-2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(24).reshape(2, 3, 4), k=-2) np.testing.assert_equal(res, expected) a = arange(12, chunk_size=2).reshape(3, 4).tosparse() t = tril(a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(12).reshape(3, 4)) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = tril(a, k=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(12).reshape(3, 4), k=1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = tril(a, k=2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(12).reshape(3, 4), k=2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = tril(a, k=-1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(12).reshape(3, 4), k=-1) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) t = tril(a, k=-2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.tril(np.arange(12).reshape(3, 4), k=-2) self.assertIsInstance(res, SparseNDArray) np.testing.assert_equal(res, expected) def testIndexTrickExecution(self): mgrid = nd_grid() t = mgrid[0:5, 0:5] res = self.executor.execute_tensor(t, concat=True)[0] expected = np.lib.index_tricks.nd_grid()[0:5, 0:5] np.testing.assert_equal(res, expected) t = mgrid[-1:1:5j] res = self.executor.execute_tensor(t, concat=True)[0] expected = np.lib.index_tricks.nd_grid()[-1:1:5j] np.testing.assert_equal(res, expected) ogrid = nd_grid(sparse=True) t = ogrid[0:5, 0:5] res = [self.executor.execute_tensor(o, concat=True)[0] for o in t] expected = np.lib.index_tricks.nd_grid(sparse=True)[0:5, 0:5] [np.testing.assert_equal(r, e) for r, e in zip(res, expected)] @unittest.skipIf(tiledb is None, 'tiledb not installed') def testReadTileDBExecution(self): ctx = tiledb.Ctx() tempdir = tempfile.mkdtemp() try: # create TileDB dense array dom = tiledb.Domain( tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32), tiledb.Dim(ctx=ctx, domain=(0, 90), tile=22, dtype=np.int32), tiledb.Dim(ctx=ctx, domain=(0, 9), tile=8, dtype=np.int32), ctx=ctx, ) schema = tiledb.ArraySchema( ctx=ctx, domain=dom, sparse=False, attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)]) tiledb.DenseArray.create(tempdir, schema) expected = np.random.rand(100, 91, 10) with tiledb.DenseArray(uri=tempdir, ctx=ctx, mode='w') as arr: arr.write_direct(expected) a = fromtiledb(tempdir, ctx=ctx) result = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_allclose(expected, result) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # create 2-d TileDB sparse array dom = tiledb.Domain( tiledb.Dim(ctx=ctx, domain=(0, 99), tile=30, dtype=np.int32), tiledb.Dim(ctx=ctx, domain=(2, 11), tile=8, dtype=np.int32), ctx=ctx, ) schema = tiledb.ArraySchema( ctx=ctx, domain=dom, sparse=True, attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)]) tiledb.SparseArray.create(tempdir, schema) expected = sps.rand(100, 10, density=0.01) with tiledb.SparseArray(uri=tempdir, ctx=ctx, mode='w') as arr: I, J = expected.row, expected.col + 2 arr[I, J] = {arr.attr(0).name: expected.data} a = fromtiledb(tempdir, ctx=ctx) result = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_allclose(expected.toarray(), result.toarray()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # create 1-d TileDB sparse array dom = tiledb.Domain( tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32), ctx=ctx, ) schema = tiledb.ArraySchema( ctx=ctx, domain=dom, sparse=True, attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)]) tiledb.SparseArray.create(tempdir, schema) expected = sps.rand(1, 100, density=0.05) with tiledb.SparseArray(uri=tempdir, ctx=ctx, mode='w') as arr: I = expected.col + 1 arr[I] = expected.data a = fromtiledb(tempdir, ctx=ctx) result = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_allclose(expected.toarray()[0], result.toarray()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # create TileDB dense array with column-major dom = tiledb.Domain( tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32), tiledb.Dim(ctx=ctx, domain=(0, 90), tile=22, dtype=np.int32), tiledb.Dim(ctx=ctx, domain=(0, 9), tile=8, dtype=np.int32), ctx=ctx, ) schema = tiledb.ArraySchema( ctx=ctx, domain=dom, sparse=False, cell_order='F', attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)]) tiledb.DenseArray.create(tempdir, schema) expected = np.asfortranarray(np.random.rand(100, 91, 10)) with tiledb.DenseArray(uri=tempdir, ctx=ctx, mode='w') as arr: arr.write_direct(expected) a = fromtiledb(tempdir, ctx=ctx) result = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_allclose(expected, result) self.assertTrue(result.flags['F_CONTIGUOUS']) self.assertFalse(result.flags['C_CONTIGUOUS']) finally: shutil.rmtree(tempdir) def testFromDataFrameExecution(self): mdf = md.DataFrame({ 'angle': [0, 3, 4], 'degree': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) tensor_result = self.executor.execute_tensor(from_dataframe(mdf)) tensor_expected = self.executor.execute_tensor( mt.tensor([[0, 360], [3, 180], [4, 360]])) np.testing.assert_equal(tensor_result, tensor_expected) # test up-casting mdf2 = md.DataFrame({'a': [0.1, 0.2, 0.3], 'b': [1, 2, 3]}) tensor_result2 = self.executor.execute_tensor(from_dataframe(mdf2)) np.testing.assert_equal(tensor_result2[0].dtype, np.dtype('float64')) tensor_expected2 = self.executor.execute_tensor( mt.tensor([[0.1, 1.0], [0.2, 2.0], [0.3, 3.0]])) np.testing.assert_equal(tensor_result2, tensor_expected2) raw = [[0.1, 0.2, 0.4], [0.4, 0.7, 0.3]] mdf3 = md.DataFrame(raw, columns=list('abc'), chunk_size=2) tensor_result3 = self.executor.execute_tensor(from_dataframe(mdf3), concat=True)[0] np.testing.assert_array_equal(tensor_result3, np.asarray(raw)) self.assertTrue(tensor_result3.flags['F_CONTIGUOUS']) self.assertFalse(tensor_result3.flags['C_CONTIGUOUS']) # test from series series = md.Series([1, 2, 3]) tensor_result = series.to_tensor().execute() np.testing.assert_array_equal(tensor_result, np.array([1, 2, 3])) series = md.Series(range(10), chunk_size=3) tensor_result = series.to_tensor().execute() np.testing.assert_array_equal(tensor_result, np.arange(10)) @unittest.skipIf(h5py is None, 'h5py not installed') def testReadHDF5Execution(self): test_array = np.random.RandomState(0).rand(20, 10) group_name = 'test_group' dataset_name = 'test_dataset' with self.assertRaises(TypeError): fromhdf5(object()) with tempfile.TemporaryDirectory() as d: filename = os.path.join( d, 'test_read_{}.hdf5'.format(int(time.time()))) with h5py.File(filename, 'w') as f: g = f.create_group(group_name) g.create_dataset(dataset_name, chunks=(7, 4), data=test_array) # test filename r = fromhdf5(filename, group=group_name, dataset=dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertEqual(r.extra_params['raw_chunk_size'], (7, 4)) with self.assertRaises(ValueError): fromhdf5(filename) with self.assertRaises(ValueError): fromhdf5(filename, dataset='non_exist') with h5py.File(filename, 'r') as f: # test file r = fromhdf5(f, group=group_name, dataset=dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) with self.assertRaises(ValueError): fromhdf5(f) with self.assertRaises(ValueError): fromhdf5(f, dataset='non_exist') # test dataset ds = f['{}/{}'.format(group_name, dataset_name)] r = fromhdf5(ds) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) @unittest.skipIf(zarr is None, 'zarr not installed') def testReadZarrExecution(self): test_array = np.random.RandomState(0).rand(20, 10) group_name = 'test_group' dataset_name = 'test_dataset' with self.assertRaises(TypeError): fromzarr(object()) with tempfile.TemporaryDirectory() as d: path = os.path.join(d, 'test_read_{}.zarr'.format(int(time.time()))) group = zarr.group(path) arr = group.array(group_name + '/' + dataset_name, test_array, chunks=(7, 4)) r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) arr = zarr.open_array('{}/{}/{}'.format(path, group_name, dataset_name)) r = fromzarr(arr) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path, group=group_name, dataset=dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1) r = fromzarr(path + '/' + group_name + '/' + dataset_name) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, test_array) self.assertGreater(len(get_tiled(r).chunks), 1)
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testConcatenateExecution(self): a_data = np.random.rand(10, 20, 30) b_data = np.random.rand(10, 20, 40) c_data = np.random.rand(10, 20, 50) a = tensor(a_data, chunk_size=5) b = tensor(b_data, chunk_size=6) c = tensor(c_data, chunk_size=7) d = concatenate([a, b, c], axis=-1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.concatenate([a_data, b_data, c_data], axis=-1) self.assertTrue(np.array_equal(res, expected)) a_data = sps.random(10, 30) b_data = sps.rand(10, 40) c_data = sps.rand(10, 50) a = tensor(a_data, chunk_size=5) b = tensor(b_data, chunk_size=6) c = tensor(c_data, chunk_size=7) d = concatenate([a, b, c], axis=-1) res = self.executor.execute_tensor(d, concat=True)[0] expected = np.concatenate([a_data.A, b_data.A, c_data.A], axis=-1) self.assertTrue(np.array_equal(res.toarray(), expected)) def testStackExecution(self): raw = [np.random.randn(3, 4) for _ in range(10)] arrs = [tensor(a, chunk_size=3) for a in raw] arr2 = stack(arrs) res = self.executor.execute_tensor(arr2, concat=True) self.assertTrue(np.array_equal(res[0], np.stack(raw))) arr3 = stack(arrs, axis=1) res = self.executor.execute_tensor(arr3, concat=True) self.assertTrue(np.array_equal(res[0], np.stack(raw, axis=1))) arr4 = stack(arrs, axis=2) res = self.executor.execute_tensor(arr4, concat=True) self.assertTrue(np.array_equal(res[0], np.stack(raw, axis=2))) raw2 = [np.asfortranarray(np.random.randn(3, 4)) for _ in range(10)] arr5 = [tensor(a, chunk_size=3) for a in raw2] arr6 = stack(arr5) res = self.executor.execute_tensor(arr6, concat=True)[0] expected = np.stack(raw2).copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr7 = stack(arr5, out=empty((10, 3, 4), order='F')) res = self.executor.execute_tensor(arr7, concat=True)[0] expected = np.stack(raw2, out=np.empty((10, 3, 4), order='F')).copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testHStackExecution(self): a_data = np.random.rand(10) b_data = np.random.rand(20) a = tensor(a_data, chunk_size=4) b = tensor(b_data, chunk_size=4) c = hstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.hstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) a_data = np.random.rand(10, 20) b_data = np.random.rand(10, 5) a = tensor(a_data, chunk_size=3) b = tensor(b_data, chunk_size=4) c = hstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.hstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) def testVStackExecution(self): a_data = np.random.rand(10) b_data = np.random.rand(10) a = tensor(a_data, chunk_size=4) b = tensor(b_data, chunk_size=4) c = vstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.vstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) a_data = np.random.rand(10, 20) b_data = np.random.rand(5, 20) a = tensor(a_data, chunk_size=3) b = tensor(b_data, chunk_size=4) c = vstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.vstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) def testDStackExecution(self): a_data = np.random.rand(10) b_data = np.random.rand(10) a = tensor(a_data, chunk_size=4) b = tensor(b_data, chunk_size=4) c = dstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.dstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) a_data = np.random.rand(10, 20) b_data = np.random.rand(10, 20) a = tensor(a_data, chunk_size=3) b = tensor(b_data, chunk_size=4) c = dstack([a, b]) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.dstack([a_data, b_data]) self.assertTrue(np.array_equal(res, expected)) def testColumnStackExecution(self): a_data = np.array((1, 2, 3)) b_data = np.array((2, 3, 4)) a = tensor(a_data, chunk_size=1) b = tensor(b_data, chunk_size=2) c = column_stack((a, b)) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.column_stack((a_data, b_data)) np.testing.assert_equal(res, expected) a_data = np.random.rand(4, 2, 3) b_data = np.random.rand(4, 2, 3) a = tensor(a_data, chunk_size=1) b = tensor(b_data, chunk_size=2) c = column_stack((a, b)) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.column_stack((a_data, b_data)) np.testing.assert_equal(res, expected) def testUnion1dExecution(self): rs = np.random.RandomState(0) raw1 = rs.random(10) raw2 = rs.random(9) t1 = tensor(raw1, chunk_size=3) t2 = tensor(raw2, chunk_size=4) t = union1d(t1, t2, aggregate_size=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.union1d(raw1, raw2) np.testing.assert_array_equal(res, expected) t = union1d(t1, t2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.union1d(raw1, raw2) np.testing.assert_array_equal(res, expected)
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') def testAverageExecution(self): data = arange(1, 5, chunk_size=1) t = average(data) res = self.executor.execute_tensor(t)[0] expected = np.average(np.arange(1, 5)) self.assertEqual(res, expected) t = average(arange(1, 11, chunk_size=2), weights=arange(10, 0, -1, chunk_size=2)) res = self.executor.execute_tensor(t)[0] expected = np.average(range(1, 11), weights=range(10, 0, -1)) self.assertEqual(res, expected) data = arange(6, chunk_size=2).reshape((3, 2)) t = average(data, axis=1, weights=tensor([1. / 4, 3. / 4], chunk_size=2)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.average(np.arange(6).reshape(3, 2), axis=1, weights=(1. / 4, 3. / 4)) np.testing.assert_equal(res, expected) with self.assertRaises(TypeError): average(data, weights=tensor([1. / 4, 3. / 4], chunk_size=2)) def testCovExecution(self): data = np.array([[0, 2], [1, 1], [2, 0]]).T x = tensor(data, chunk_size=1) t = cov(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.cov(data) np.testing.assert_equal(res, expected) data_x = [-2.1, -1, 4.3] data_y = [3, 1.1, 0.12] x = tensor(data_x, chunk_size=1) y = tensor(data_y, chunk_size=1) X = stack((x, y), axis=0) t = cov(x, y) r = tall(t == cov(X)) self.assertTrue(self.executor.execute_tensor(r)[0]) def testCorrcoefExecution(self): data_x = [-2.1, -1, 4.3] data_y = [3, 1.1, 0.12] x = tensor(data_x, chunk_size=1) y = tensor(data_y, chunk_size=1) t = corrcoef(x, y) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.corrcoef(data_x, data_y) np.testing.assert_equal(res, expected) def testPtpExecution(self): x = arange(4, chunk_size=1).reshape(2, 2) t = ptp(x, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.ptp(np.arange(4).reshape(2, 2), axis=0) np.testing.assert_equal(res, expected) t = ptp(x, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.ptp(np.arange(4).reshape(2, 2), axis=1) np.testing.assert_equal(res, expected) t = ptp(x) res = self.executor.execute_tensor(t)[0] expected = np.ptp(np.arange(4).reshape(2, 2)) np.testing.assert_equal(res, expected) def testDigitizeExecution(self): data = np.array([0.2, 6.4, 3.0, 1.6]) x = tensor(data, chunk_size=2) bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0]) inds = digitize(x, bins) res = self.executor.execute_tensor(inds, concat=True)[0] expected = np.digitize(data, bins) np.testing.assert_equal(res, expected) b = tensor(bins, chunk_size=2) inds = digitize(x, b) res = self.executor.execute_tensor(inds, concat=True)[0] expected = np.digitize(data, bins) np.testing.assert_equal(res, expected) data = np.array([1.2, 10.0, 12.4, 15.5, 20.]) x = tensor(data, chunk_size=2) bins = np.array([0, 5, 10, 15, 20]) inds = digitize(x, bins, right=True) res = self.executor.execute_tensor(inds, concat=True)[0] expected = np.digitize(data, bins, right=True) np.testing.assert_equal(res, expected) inds = digitize(x, bins, right=False) res = self.executor.execute_tensor(inds, concat=True)[0] expected = np.digitize(data, bins, right=False) np.testing.assert_equal(res, expected) data = sps.random(10, 1, density=.1) * 12 x = tensor(data, chunk_size=2) bins = np.array([1.0, 2.0, 2.5, 4.0, 10.0]) inds = digitize(x, bins) res = self.executor.execute_tensor(inds, concat=True)[0] expected = np.digitize(data.toarray(), bins, right=False) np.testing.assert_equal(res.toarray(), expected) @ignore_warning def testHistogramBinEdgesExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20, )) a = tensor(raw, chunk_size=3) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram_bin_edges(a, range=range_) result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram_bin_edges(raw, range=range_) np.testing.assert_array_equal(result, expected) ctx, executor = self._create_test_context(self.executor) with ctx: raw2 = rs.randint(10, size=(1, )) b = tensor(raw2) raw3 = rs.randint(10, size=(0, )) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: test_bins = [ 10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges' ] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(bin_edges) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=bins) np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=[0, 4, 8]) np.testing.assert_array_equal(result, expected) raw = np.arange(5) a = tensor(raw, chunk_size=3) bin_edges = histogram_bin_edges(a) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(raw) self.assertEqual(bin_edges.shape, expected.shape) np.testing.assert_array_equal(result, expected) @ignore_warning def testHistogramExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20, )) a = tensor(raw, chunk_size=3) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=4) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) ctx, executor = self._create_test_context(self.executor) with ctx: raw2 = rs.randint(10, size=(1, )) b = tensor(raw2) raw3 = rs.randint(10, size=(0, )) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [ 10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges' ] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(hist) result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected) # test unknown shape raw4 = rs.rand(10) d = tensor(raw4, chunk_size=3) d = d[d < 0.9] hist = histogram(d) result = executor.execute_tensors(hist)[0] expected = np.histogram(raw4[raw4 < 0.9])[0] np.testing.assert_array_equal(result, expected) raw5 = np.arange(3, 10) e = arange(10, chunk_size=3) e = e[e >= 3] hist = histogram(e) result = executor.execute_tensors(hist)[0] expected = np.histogram(raw5)[0] np.testing.assert_array_equal(result, expected) def testQuantileExecution(self): # test 1 chunk, 1-d raw = np.random.rand(20) a = tensor(raw, chunk_size=20) raw2 = raw.copy() raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [ np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5) ]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: r = quantile(a, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile(raw, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile(raw2, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test 1 chunk, 2-d raw = np.random.rand(20, 10) a = tensor(raw, chunk_size=20) raw2 = raw.copy() raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [ np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5) ]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: for axis in [None, 0, 1]: r = quantile(a, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile(raw, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) r2 = quantile(a2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile(raw2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_array_equal(result, expected) # test multi chunks, 1-d raw = np.random.rand(20) a = tensor(raw, chunk_size=3) raw2 = raw.copy() raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=20) for q in [ np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5) ]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: r = quantile(a, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile(raw, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_almost_equal(result, expected) r2 = quantile(a2, q, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile(raw2, q, interpolation=interpolation, keepdims=keepdims) np.testing.assert_almost_equal(result, expected) # test multi chunk, 2-d raw = np.random.rand(20, 10) a = tensor(raw, chunk_size=(12, 6)) raw2 = raw.copy() raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan a2 = tensor(raw2, chunk_size=(12, 6)) for q in [ np.random.RandomState(0).rand(), np.random.RandomState(0).rand(5) ]: for interpolation in INTERPOLATION_TYPES: for keepdims in [True, False]: for axis in [None, 0, 1]: r = quantile(a, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.quantile(raw, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_almost_equal(result, expected) r2 = quantile(a2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) result = self.executor.execute_tensor(r2, concat=True)[0] expected = np.quantile(raw2, q, axis=axis, interpolation=interpolation, keepdims=keepdims) np.testing.assert_almost_equal(result, expected) # test out, 1 chunk raw = np.random.rand(20) q = np.random.rand(11) a = tensor(raw, chunk_size=20) out = empty((5, 11)) quantile(a, q, out=out) result = self.executor.execute_tensor(out, concat=True)[0] expected = np.quantile(raw, q, out=np.empty((5, 11))) np.testing.assert_array_equal(result, expected) # test out, multi chunks raw = np.random.rand(20) q = np.random.rand(11) a = tensor(raw, chunk_size=3) out = empty((5, 11)) quantile(a, q, out=out) result = self.executor.execute_tensor(out, concat=True)[0] expected = np.quantile(raw, q, out=np.empty((5, 11))) np.testing.assert_almost_equal(result, expected) # test q which is a tensor q_raw = np.random.RandomState(0).rand(5) q = tensor(q_raw, chunk_size=3) ctx, executor = self._create_test_context(self.executor) with ctx: r = quantile(a, q, axis=None) result = executor.execute_tensors([r])[0] expected = np.quantile(raw, q_raw, axis=None) np.testing.assert_almost_equal(result, expected) with self.assertRaises(ValueError): q[0] = 1.1 r = quantile(a, q, axis=None) _ = executor.execute_tensors(r)[0] def testPercentileExecution(self): raw = np.random.rand(20, 10) q = np.random.RandomState(0).randint(100, size=11) a = tensor(raw, chunk_size=7) r = percentile(a, q) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.percentile(raw, q) np.testing.assert_almost_equal(result, expected) mq = tensor(q) ctx, executor = self._create_test_context(self.executor) with ctx: r = percentile(a, mq) result = executor.execute_tensors([r])[0] np.testing.assert_almost_equal(result, expected) def testMedianExecution(self): raw = np.random.rand(20, 10) a = tensor(raw, chunk_size=7) r = median(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.median(raw) np.testing.assert_array_equal(result, expected) r = median(a, axis=1) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.median(raw, axis=1) np.testing.assert_array_equal(result, expected)
class Test(unittest.TestCase): def setUp(self) -> None: self._executor = ExecutorForTest('numpy') @unittest.skipIf(distance.pdist is None, 'scipy not installed') def testPdistExecution(self): from scipy.spatial.distance import pdist as sp_pdist raw = np.random.rand(100, 10) # test 1 chunk x = tensor(raw, chunk_size=100) dist = distance.pdist(x) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.pdist(x, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric=f) np.testing.assert_array_equal(result, expected) # test more than 1 chunk x = tensor(raw, chunk_size=12) dist = distance.pdist(x) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 1) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, aggregate_size=3) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 3) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, metric='hamming', aggregate_size=2) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 2) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.pdist(x, metric=f, aggregate_size=2) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric=f) np.testing.assert_array_equal(result, expected) for x in [tensor(raw), tensor(raw, chunk_size=12)]: # test w weight = np.random.rand(10) w = tensor(weight, chunk_size=7) dist = distance.pdist(x, metric='wminkowski', p=3, w=w) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='wminkowski', p=3, w=weight) np.testing.assert_array_equal(result, expected) # test V v = np.random.rand(10) V = tensor(v, chunk_size=7) dist = distance.pdist(x, metric='seuclidean', V=V) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='seuclidean', V=v) np.testing.assert_array_equal(result, expected) # test VI vi = np.random.rand(10, 10) VI = tensor(vi, chunk_size=8) dist = distance.pdist(x, metric='mahalanobis', VI=VI) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='mahalanobis', VI=vi) np.testing.assert_array_equal(result, expected) @unittest.skipIf(distance.cdist is None, 'scipy not installed') def testCdistExecution(self): from scipy.spatial.distance import cdist as sp_cdist raw_a = np.random.rand(100, 10) raw_b = np.random.rand(89, 10) # test 1 chunk xa = tensor(raw_a, chunk_size=100) xb = tensor(raw_b, chunk_size=100) dist = distance.cdist(xa, xb) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b) np.testing.assert_array_equal(result, expected) dist = distance.cdist(xa, xb, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.cdist(xa, xb, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric=f) np.testing.assert_array_equal(result, expected) # test more than 1 chunk xa = tensor(raw_a, chunk_size=12) xb = tensor(raw_b, chunk_size=13) dist = distance.cdist(xa, xb) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b) np.testing.assert_array_equal(result, expected) dist = distance.cdist(xa, xb, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.cdist(xa, xb, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric=f) np.testing.assert_array_equal(result, expected) for xa, xb in [(tensor(raw_a), tensor(raw_b)), (tensor(raw_a, chunk_size=12), tensor(raw_b, chunk_size=13))]: # test w weight = np.random.rand(10) w = tensor(weight, chunk_size=7) dist = distance.cdist(xa, xb, metric='wminkowski', p=3, w=w) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='wminkowski', p=3, w=weight) np.testing.assert_array_equal(result, expected) # test V v = np.random.rand(10) V = tensor(v, chunk_size=7) dist = distance.cdist(xa, xb, metric='seuclidean', V=V) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='seuclidean', V=v) np.testing.assert_array_equal(result, expected) # test VI vi = np.random.rand(10, 10) VI = tensor(vi, chunk_size=8) dist = distance.cdist(xa, xb, metric='mahalanobis', VI=VI) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='mahalanobis', VI=vi) np.testing.assert_array_equal(result, expected) @unittest.skipIf(distance.cdist is None, 'scipy not installed') def testSqureFormExecution(self): from scipy.spatial.distance import pdist as sp_pdist, \ squareform as sp_squareform raw_a = np.random.rand(80, 10) raw_pdsit = sp_pdist(raw_a) raw_square = sp_squareform(raw_pdsit) # tomatrix, test 1 chunk vec = tensor(raw_pdsit, chunk_size=raw_pdsit.shape[0]) mat = distance.squareform(vec, chunk_size=100) result = self._executor.execute_tensor(mat, concat=True)[0] np.testing.assert_array_equal(result, raw_square) # tomatrix, test more than 1 chunk vec = tensor(raw_pdsit, chunk_size=33) self.assertGreater(len(vec.tiles().chunks), 1) mat = distance.squareform(vec, chunk_size=34) result = self._executor.execute_tensor(mat, concat=True)[0] np.testing.assert_array_equal(result, raw_square) # tovec, test 1 chunk mat = tensor(raw_square) vec = distance.squareform(mat, chunk_size=raw_pdsit.shape[0]) self.assertEqual(len(mat.tiles().chunks), 1) self.assertEqual(len(vec.tiles().chunks), 1) result = self._executor.execute_tensor(vec, concat=True)[0] np.testing.assert_array_equal(result, raw_pdsit) # tovec, test more than 1 chunk mat = tensor(raw_square, chunk_size=31) vec = distance.squareform(mat, chunk_size=40) self.assertGreater(len(vec.tiles().chunks), 1) result = self._executor.execute_tensor(vec, concat=True)[0] np.testing.assert_array_equal(result, raw_pdsit) # test checks # generate non-symmetric matrix non_sym_arr = np.random.RandomState(0).rand(10, 10) # 1 chunk mat = tensor(non_sym_arr) vec = distance.squareform(mat, checks=True, chunk_size=100) with self.assertRaises(ValueError): _ = self._executor.execute_tensor(vec, concat=True)[0] # force checks=False vec = distance.squareform(mat, checks=False, chunk_size=100) _ = self._executor.execute_tensor(vec, concat=True)[0] # more than 1 chunk mat = tensor(non_sym_arr, chunk_size=6) vec = distance.squareform(mat, checks=True, chunk_size=8) self.assertGreater(len(vec.tiles().chunks), 1) with self.assertRaises(ValueError): _ = self._executor.execute_tensor(vec, concat=True)[0] # force checks=False vec = distance.squareform(mat, checks=False, chunk_size=100) _ = self._executor.execute_tensor(vec, concat=True)[0]
class TestUnary(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testAbs(self): data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10))) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(df1.abs(), concat=True)[0] expected = data1.abs() pd.testing.assert_frame_equal(expected, result) result = self.executor.execute_dataframe(abs(df1), concat=True)[0] pd.testing.assert_frame_equal(expected, result) def testNot(self): data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10)) > 0) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(~df1, concat=True)[0] expected = ~data1 pd.testing.assert_frame_equal(expected, result) def testNegative(self): data1 = pd.DataFrame(np.random.randint(low=0, high=100, size=(10, 10))) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(-df1, concat=True)[0] expected = -data1 pd.testing.assert_frame_equal(expected, result) def testUfunc(self): df_raw = pd.DataFrame(np.random.uniform(size=(10, 10)), index=pd.RangeIndex(9, -1, -1)) df = from_pandas(df_raw, chunk_size=5) series_raw = pd.Series(np.random.uniform(size=10), index=pd.RangeIndex(9, -1, -1)) series = from_pandas_series(series_raw, chunk_size=5) ufuncs = [ [np.abs, mt.abs], [np.log, mt.log], [np.log2, mt.log2], [np.log10, mt.log10], [np.sin, mt.sin], [np.cos, mt.cos], [np.tan, mt.tan], [np.sinh, mt.sinh], [np.cosh, mt.cosh], [np.tanh, mt.tanh], [np.arcsin, mt.arcsin], [np.arccos, mt.arccos], [np.arctan, mt.arctan], [np.arcsinh, mt.arcsinh], [np.arccosh, mt.arccosh], [np.arctanh, mt.arctanh], [np.radians, mt.radians], [np.degrees, mt.degrees], [np.ceil, mt.ceil], [np.floor, mt.floor], [partial(np.around, decimals=2), partial(mt.around, decimals=2)], [np.exp, mt.exp], [np.exp2, mt.exp2], [np.expm1, mt.expm1], [np.sqrt, mt.sqrt], [np.isnan, mt.isnan], [np.isfinite, mt.isfinite], [np.isinf, mt.isinf], [np.negative, mt.negative], ] for raw, data in [(df_raw, df), (series_raw, series)]: for npf, mtf in ufuncs: r = mtf(data) result = self.executor.execute_tensor(r, concat=True)[0] expected = npf(raw) if isinstance(raw, pd.DataFrame): pd.testing.assert_frame_equal(result, expected) else: pd.testing.assert_series_equal(result, expected) # test numpy ufunc r = npf(data) result = self.executor.execute_tensor(r, concat=True)[0] if isinstance(raw, pd.DataFrame): pd.testing.assert_frame_equal(result, expected) else: pd.testing.assert_series_equal(result, expected) def testDateTimeBin(self): rs = np.random.RandomState(0) df_raw = pd.DataFrame({'a': rs.randint(1000, size=10), 'b': rs.rand(10), 'c': [pd.Timestamp(rs.randint(1604000000, 1604481373)) for _ in range(10)]}, index=pd.RangeIndex(9, -1, -1)) df = from_pandas(df_raw, chunk_size=5) r = (df['c'] > to_datetime('2000-01-01')) & (df['c'] < to_datetime('2021-01-01')) result = self.executor.execute_dataframe(r, concat=True)[0] expected = (df_raw['c'] > pd.to_datetime('2000-01-01')) & \ (df_raw['c'] < pd.to_datetime('2021-01-01')) pd.testing.assert_series_equal(result, expected) def testSeriesAndTensor(self): rs = np.random.RandomState(0) s_raw = pd.Series(rs.rand(10)) < 0.5 a_raw = rs.rand(10) < 0.5 series = from_pandas_series(s_raw, chunk_size=5) t = mt.tensor(a_raw, chunk_size=5) r = t | series result = self.executor.execute_dataframe(r, concat=True)[0] expected = a_raw | s_raw pd.testing.assert_series_equal(result, expected)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testManhattanDistances(self): x = mt.random.randint(10, size=(10, 3), density=0.4) y = mt.random.randint(10, size=(11, 3), density=0.5) with self.assertRaises(TypeError): manhattan_distances(x, y, sum_over_features=False) x = x.todense() y = y.todense() d = manhattan_distances(x, y, sum_over_features=True) self.assertEqual(d.shape, (10, 11)) d = manhattan_distances(x, y, sum_over_features=False) self.assertEqual(d.shape, (110, 3)) def testManhattanDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x1 = mt.tensor(raw_x, chunk_size=30) y1 = mt.tensor(raw_y, chunk_size=30) x2 = mt.tensor(raw_x, chunk_size=11) y2 = mt.tensor(raw_y, chunk_size=12) raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0) raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0) x3 = mt.tensor(raw_sparse_x, chunk_size=30) y3 = mt.tensor(raw_sparse_y, chunk_size=30) x4 = mt.tensor(raw_sparse_x, chunk_size=11) y4 = mt.tensor(raw_sparse_y, chunk_size=12) for x, y, is_sparse in [(x1, y1, False), (x2, y2, False), (x3, y3, True), (x4, y4, True)]: if is_sparse: rx, ry = raw_sparse_x, raw_sparse_y else: rx, ry = raw_x, raw_y sv = [True, False] if not is_sparse else [True] for sum_over_features in sv: d = manhattan_distances(x, y, sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances(rx, ry, sum_over_features) np.testing.assert_almost_equal(result, expected) d = manhattan_distances(x, sum_over_features=sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances( rx, sum_over_features=sum_over_features) np.testing.assert_almost_equal(result, expected)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testNormalizeOp(self): with self.assertRaises(ValueError): normalize(mt.random.random(10, 3), norm='unknown') with self.assertRaises(ValueError): normalize(mt.random.random(10, 3), axis=-1) with self.assertRaises(ValueError): normalize(mt.random.rand(10, 3, 3)) def testNormalizeExecution(self): raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format='csr') for chunk_size in [10, 6, (10, 6), (6, 10)]: for raw, x in [ (raw_dense, mt.tensor(raw_dense, chunk_size=chunk_size)), (raw_sparse, mt.tensor(raw_sparse, chunk_size=chunk_size)) ]: for norm in ['l1', 'l2', 'max']: for axis in (0, 1): for use_sklearn in [True, False]: n = normalize(x, norm=norm, axis=axis, return_norm=False) n.op._use_sklearn = use_sklearn result = self.executor.execute_tensor( n, concat=True)[0] expected = sk_normalize(raw, norm=norm, axis=axis, return_norm=False) if sps.issparse(expected): expected = expected.A np.testing.assert_almost_equal( np.asarray(result), expected) raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format='csr') # test copy and return_normalize for axis in (0, 1): for chunk_size in (10, 6, (6, 10)): for raw in (raw_dense, raw_sparse): x = mt.tensor(raw, chunk_size=chunk_size) n = normalize(x, axis=axis, copy=False, return_norm=True) results = self.executor.execute_tensors(n) raw_copy = raw.copy() try: expects = sk_normalize(raw_copy, axis=axis, copy=False, return_norm=True) except NotImplementedError: continue if sps.issparse(expects[0]): expected = expects[0].A else: expected = expects[0] np.testing.assert_almost_equal(np.asarray(results[0]), expected) np.testing.assert_almost_equal(results[1], expects[1])
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest('numpy') @unittest.skipIf(tiledb is None, 'tiledb not installed') def testStoreTileDBExecution(self): ctx = tiledb.Ctx() tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.random.rand(8, 4, 3) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store tensor with 1 chunk to TileDB dense array a = arange(12) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(np.arange(12), arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store 2-d TileDB sparse array expected = sps.random(8, 7, density=0.1) a = tensor(expected, chunk_size=(3, 5)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr: data = arr[:, :] coords = data['coords'] value = data[arr.attr(0).name] ij = tuple(coords[arr.domain.dim(k).name] for k in range(arr.ndim)) result = sps.coo_matrix((value, ij), shape=arr.shape) np.testing.assert_allclose(expected.toarray(), result.toarray()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.asfortranarray(np.random.rand(8, 4, 3)) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) self.assertEqual(arr.schema.cell_order, 'col-major') finally: shutil.rmtree(tempdir) @unittest.skipIf(h5py is None, 'h5py not installed') def testStoreHDF5Execution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t1 = tensor(raw, chunk_size=20) t2 = tensor(raw, chunk_size=9) with self.assertRaises(TypeError): tohdf5(object(), t2) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: with tempfile.TemporaryDirectory() as d: filename = os.path.join( d, 'test_store_{}.hdf5'.format(int(time.time()))) # test 1 chunk r = tohdf5(filename, t1, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format( group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) # test filename r = tohdf5(filename, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) rt = get_tiled(r) self.assertEqual( type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive') self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format( group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): tohdf5(filename, t2) with h5py.File(filename, 'r') as f: # test file r = tohdf5(f, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format( group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): with h5py.File(filename, 'r') as f: tohdf5(f, t2) with h5py.File(filename, 'r') as f: # test dataset ds = f['{}/{}'.format(group_name, dataset_name)] # test file r = tohdf5(ds, t2) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format( group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) @unittest.skipIf(zarr is None, 'zarr not installed') def testStoreZarrExecution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t = tensor(raw, chunk_size=6) with self.assertRaises(TypeError): tozarr(object(), t) with tempfile.TemporaryDirectory() as d: filename = os.path.join( d, 'test_store_{}.zarr'.format(int(time.time()))) path = '{}/{}/{}'.format(filename, group_name, dataset_name) r = tozarr(filename, t, group=group_name, dataset=dataset_name, compressor=Zstd(level=3)) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw) self.assertEqual(arr.compressor, Zstd(level=3)) r = tozarr(path, t + 2) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw + 2) filters = [Delta(dtype='i4')] compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) arr = zarr.open(path, compressor=compressor, filters=filters) r = tozarr(arr, t + 1) self.executor.execute_tensor(r) result = zarr.open_array(path) np.testing.assert_array_equal(result, raw + 1)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testSetIndex(self): df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], index=['a1', 'a2', 'a3'], columns=['x', 'y', 'z']) df2 = md.DataFrame(df1, chunk_size=2) expected = df1.set_index('y', drop=True) df3 = df2.set_index('y', drop=True) pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df3, concat=True)[0]) expected = df1.set_index('y', drop=False) df4 = df2.set_index('y', drop=False) pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df4, concat=True)[0]) def testILocGetItem(self): df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], index=['a1', 'a2', 'a3'], columns=['x', 'y', 'z']) df2 = md.DataFrame(df1, chunk_size=2) # plain index expected = df1.iloc[1] df3 = df2.iloc[1] pd.testing.assert_series_equal( expected, self.executor.execute_dataframe(df3, concat=True, check_series_name=False)[0]) # plain index on axis 1 expected = df1.iloc[:2, 1] df4 = df2.iloc[:2, 1] pd.testing.assert_series_equal( expected, self.executor.execute_dataframe(df4, concat=True)[0]) # slice index expected = df1.iloc[:, 2:4] df5 = df2.iloc[:, 2:4] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df5, concat=True)[0]) # plain fancy index expected = df1.iloc[[0], [0, 1, 2]] df6 = df2.iloc[[0], [0, 1, 2]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df6, concat=True)[0]) # plain fancy index with shuffled order expected = df1.iloc[[0], [1, 2, 0]] df7 = df2.iloc[[0], [1, 2, 0]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df7, concat=True)[0]) # fancy index expected = df1.iloc[[1, 2], [0, 1, 2]] df8 = df2.iloc[[1, 2], [0, 1, 2]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df8, concat=True)[0]) # fancy index with shuffled order expected = df1.iloc[[2, 1], [1, 2, 0]] df9 = df2.iloc[[2, 1], [1, 2, 0]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df9, concat=True)[0]) # one fancy index expected = df1.iloc[[2, 1]] df10 = df2.iloc[[2, 1]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df10, concat=True)[0]) # plain index expected = df1.iloc[1, 2] df11 = df2.iloc[1, 2] self.assertEqual(expected, self.executor.execute_dataframe(df11, concat=True)[0]) # bool index array expected = df1.iloc[[True, False, True], [2, 1]] df12 = df2.iloc[[True, False, True], [2, 1]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df12, concat=True)[0]) # bool index array on axis 1 expected = df1.iloc[[2, 1], [True, False, True]] df14 = df2.iloc[[2, 1], [True, False, True]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df14, concat=True)[0]) # bool index expected = df1.iloc[[True, False, True], [2, 1]] df13 = df2.iloc[md.Series([True, False, True], chunk_size=1), [2, 1]] pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df13, concat=True)[0]) # test Series data = pd.Series(np.arange(10)) series = md.Series(data, chunk_size=3).iloc[:3] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[:3]) series = md.Series(data, chunk_size=3).iloc[4] self.assertEqual( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[4]) series = md.Series(data, chunk_size=3).iloc[[2, 3, 4, 9]] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[[2, 3, 4, 9]]) series = md.Series(data, chunk_size=3).iloc[[4, 3, 9, 2]] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[[4, 3, 9, 2]]) series = md.Series(data).iloc[5:] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[5:]) # bool index array selection = np.random.RandomState(0).randint(2, size=10, dtype=bool) series = md.Series(data).iloc[selection] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[selection]) # bool index series = md.Series(data).iloc[md.Series(selection, chunk_size=4)] pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data.iloc[selection]) def testILocSetItem(self): df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], index=['a1', 'a2', 'a3'], columns=['x', 'y', 'z']) df2 = md.DataFrame(df1, chunk_size=2) # plain index expected = df1 expected.iloc[1] = 100 df2.iloc[1] = 100 pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df2, concat=True)[0]) # slice index expected.iloc[:, 2:4] = 1111 df2.iloc[:, 2:4] = 1111 pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df2, concat=True)[0]) # plain fancy index expected.iloc[[0], [0, 1, 2]] = 2222 df2.iloc[[0], [0, 1, 2]] = 2222 pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df2, concat=True)[0]) # fancy index expected.iloc[[1, 2], [0, 1, 2]] = 3333 df2.iloc[[1, 2], [0, 1, 2]] = 3333 pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df2, concat=True)[0]) # plain index expected.iloc[1, 2] = 4444 df2.iloc[1, 2] = 4444 pd.testing.assert_frame_equal( expected, self.executor.execute_dataframe(df2, concat=True)[0]) # test Series data = pd.Series(np.arange(10)) series = md.Series(data, chunk_size=3) series.iloc[:3] = 1 data.iloc[:3] = 1 pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data) series.iloc[4] = 2 data.iloc[4] = 2 pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data) series.iloc[[2, 3, 4, 9]] = 3 data.iloc[[2, 3, 4, 9]] = 3 pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data) series.iloc[5:] = 4 data.iloc[5:] = 4 pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], data) def testLocGetItem(self): rs = np.random.RandomState(0) # index and columns are labels raw1 = pd.DataFrame(rs.randint(10, size=(5, 4)), index=['a1', 'a2', 'a3', 'a4', 'a5'], columns=['a', 'b', 'c', 'd']) # columns are labels raw2 = raw1.copy() raw2.reset_index(inplace=True, drop=True) # columns are non unique and monotonic raw3 = raw1.copy() raw3.columns = ['a', 'b', 'b', 'd'] # columns are non unique and non monotonic raw4 = raw1.copy() raw4.columns = ['b', 'a', 'b', 'd'] # index that is timestamp raw5 = raw1.copy() raw5.index = pd.date_range('2020-1-1', periods=5) df1 = md.DataFrame(raw1, chunk_size=2) df2 = md.DataFrame(raw2, chunk_size=2) df3 = md.DataFrame(raw3, chunk_size=2) df4 = md.DataFrame(raw4, chunk_size=2) df5 = md.DataFrame(raw5, chunk_size=2) df = df2.loc[3, 'b'] result = self.executor.execute_tensor(df, concat=True)[0] expected = raw2.loc[3, 'b'] self.assertEqual(result, expected) df = df1.loc['a3', 'b'] result = self.executor.execute_tensor(df, concat=True, check_shape=False)[0] expected = raw1.loc['a3', 'b'] self.assertEqual(result, expected) df = df2.loc[1:4, 'b':'d'] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw2.loc[1:4, 'b':'d'] pd.testing.assert_frame_equal(result, expected) df = df2.loc[:4, 'b':] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw2.loc[:4, 'b':] pd.testing.assert_frame_equal(result, expected) # slice on axis index whose index_value does not have value df = df1.loc['a2':'a4', 'b':] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw1.loc['a2':'a4', 'b':] pd.testing.assert_frame_equal(result, expected) df = df2.loc[:, 'b'] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw2.loc[:, 'b'] pd.testing.assert_series_equal(result, expected) # 'b' is non-unique df = df3.loc[:, 'b'] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw3.loc[:, 'b'] pd.testing.assert_frame_equal(result, expected) # 'b' is non-unique, and non-monotonic df = df4.loc[:, 'b'] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw4.loc[:, 'b'] pd.testing.assert_frame_equal(result, expected) # label on axis 0 df = df1.loc['a2', :] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw1.loc['a2', :] pd.testing.assert_series_equal(result, expected) # label-based fancy index df = df2.loc[[3, 0, 1], ['c', 'a', 'd']] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw2.loc[[3, 0, 1], ['c', 'a', 'd']] pd.testing.assert_frame_equal(result, expected) # label-based fancy index, asc sorted df = df2.loc[[0, 1, 3], ['a', 'c', 'd']] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw2.loc[[0, 1, 3], ['a', 'c', 'd']] pd.testing.assert_frame_equal(result, expected) # label-based fancy index in which non-unique exists selection = rs.randint(2, size=(5, ), dtype=bool) df = df3.loc[selection, ['b', 'a', 'd']] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw3.loc[selection, ['b', 'a', 'd']] pd.testing.assert_frame_equal(result, expected) df = df3.loc[md.Series(selection), ['b', 'a', 'd']] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw3.loc[selection, ['b', 'a', 'd']] pd.testing.assert_frame_equal(result, expected) # label-based fancy index on index # whose index_value does not have value df = df1.loc[['a3', 'a1'], ['b', 'a', 'd']] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw1.loc[['a3', 'a1'], ['b', 'a', 'd']] pd.testing.assert_frame_equal(result, expected) # get timestamp by str df = df5.loc['20200101'] result = self.executor.execute_dataframe(df, concat=True, check_series_name=False)[0] expected = raw5.loc['20200101'] pd.testing.assert_series_equal(result, expected) # get timestamp by str, return scalar df = df5.loc['2020-1-1', 'c'] result = self.executor.execute_dataframe(df, concat=True)[0] expected = raw5.loc['2020-1-1', 'c'] self.assertEqual(result, expected) def testDataFrameGetitem(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c1', 'c2', 'c3', 'c4', 'c5']) df = md.DataFrame(data, chunk_size=2) data2 = data.copy() data2.index = pd.date_range('2020-1-1', periods=10) mdf = md.DataFrame(data2, chunk_size=3) series1 = df['c2'] pd.testing.assert_series_equal( self.executor.execute_dataframe(series1, concat=True)[0], data['c2']) series2 = df['c5'] pd.testing.assert_series_equal( self.executor.execute_dataframe(series2, concat=True)[0], data['c5']) df1 = df[['c1', 'c2', 'c3']] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df1, concat=True)[0], data[['c1', 'c2', 'c3']]) df2 = df[['c3', 'c2', 'c1']] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df2, concat=True)[0], data[['c3', 'c2', 'c1']]) df3 = df[['c1']] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df3, concat=True)[0], data[['c1']]) df4 = df[['c3', 'c1', 'c2', 'c1']] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df4, concat=True)[0], data[['c3', 'c1', 'c2', 'c1']]) df5 = df[np.array(['c1', 'c2', 'c3'])] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df5, concat=True)[0], data[['c1', 'c2', 'c3']]) df6 = df[['c3', 'c2', 'c1']] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df6, concat=True)[0], data[['c3', 'c2', 'c1']]) df7 = df[1:7:2] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df7, concat=True)[0], data[1:7:2]) series3 = df['c1'][0] self.assertEqual( self.executor.execute_dataframe(series3, concat=True)[0], data['c1'][0]) df8 = mdf[3:7] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df8, concat=True)[0], data2[3:7]) df9 = mdf['2020-1-2':'2020-1-5'] pd.testing.assert_frame_equal( self.executor.execute_dataframe(df9, concat=True)[0], data2['2020-1-2':'2020-1-5']) def testDataFrameGetitemBool(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c1', 'c2', 'c3', 'c4', 'c5']) df = md.DataFrame(data, chunk_size=2) mask_data = data.c1 > 0.5 mask = md.Series(mask_data, chunk_size=2) # getitem by mars series self.assertEqual( self.executor.execute_dataframe(df[mask], concat=True)[0].shape, data[mask_data].shape) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df[mask], concat=True)[0], data[mask_data]) # getitem by pandas series pd.testing.assert_frame_equal( self.executor.execute_dataframe(df[mask_data], concat=True)[0], data[mask_data]) # getitem by mars series with alignment but no shuffle mask_data = pd.Series( [True, True, True, False, False, True, True, False, False, True], index=range(9, -1, -1)) mask = md.Series(mask_data, chunk_size=2) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df[mask], concat=True)[0], data[mask_data]) # getitem by mars series with shuffle alignment mask_data = pd.Series( [True, True, True, False, False, True, True, False, False, True], index=[0, 3, 6, 2, 9, 8, 5, 7, 1, 4]) mask = md.Series(mask_data, chunk_size=2) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df[mask], concat=True)[0].sort_index(), data[mask_data]) # getitem by mars series with shuffle alignment and extra element mask_data = pd.Series([ True, True, True, False, False, True, True, False, False, True, False ], index=[0, 3, 6, 2, 9, 8, 5, 7, 1, 4, 10]) mask = md.Series(mask_data, chunk_size=2) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df[mask], concat=True)[0].sort_index(), data[mask_data]) # getitem by DataFrame with all bool columns r = df[df > 0.5] result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, data[data > 0.5]) def testDataFrameGetitemUsingAttr(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c1', 'c2', 'key', 'dtypes', 'size']) df = md.DataFrame(data, chunk_size=2) series1 = df.c2 pd.testing.assert_series_equal( self.executor.execute_dataframe(series1, concat=True)[0], data.c2) # accessing column using attribute shouldn't overwrite existing attributes self.assertEqual(df.key, getattr(getattr(df, '_data'), '_key')) self.assertEqual(df.size, data.size) pd.testing.assert_series_equal(df.dtypes, data.dtypes) # accessing non-existing attributes should trigger exception with self.assertRaises(AttributeError): _ = df.zzz # noqa: F841 def testSeriesGetitem(self): data = pd.Series(np.random.rand(10)) series = md.Series(data) self.assertEqual( self.executor.execute_dataframe(series[1], concat=True)[0], data[1]) data = pd.Series(np.random.rand(10), name='a') series = md.Series(data, chunk_size=4) for i in range(10): series1 = series[i] self.assertEqual( self.executor.execute_dataframe(series1, concat=True)[0], data[i]) series2 = series[[0, 1, 2, 3, 4]] pd.testing.assert_series_equal( self.executor.execute_dataframe(series2, concat=True)[0], data[[0, 1, 2, 3, 4]]) series3 = series[[4, 3, 2, 1, 0]] pd.testing.assert_series_equal( self.executor.execute_dataframe(series3, concat=True)[0], data[[4, 3, 2, 1, 0]]) series4 = series[[1, 2, 3, 2, 1, 0]] pd.testing.assert_series_equal( self.executor.execute_dataframe(series4, concat=True)[0], data[[1, 2, 3, 2, 1, 0]]) # index = ['i' + str(i) for i in range(20)] data = pd.Series(np.random.rand(20), index=index, name='a') series = md.Series(data, chunk_size=3) for idx in index: series1 = series[idx] self.assertEqual( self.executor.execute_dataframe(series1, concat=True)[0], data[idx]) selected = ['i1', 'i2', 'i3', 'i4', 'i5'] series2 = series[selected] pd.testing.assert_series_equal( self.executor.execute_dataframe(series2, concat=True)[0], data[selected]) selected = ['i4', 'i7', 'i0', 'i1', 'i5'] series3 = series[selected] pd.testing.assert_series_equal( self.executor.execute_dataframe(series3, concat=True)[0], data[selected]) selected = ['i0', 'i1', 'i5', 'i4', 'i0', 'i1'] series4 = series[selected] pd.testing.assert_series_equal( self.executor.execute_dataframe(series4, concat=True)[0], data[selected]) selected = ['i0'] series5 = series[selected] pd.testing.assert_series_equal( self.executor.execute_dataframe(series5, concat=True)[0], data[selected]) def testHead(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c1', 'c2', 'c3', 'c4', 'c5']) df = md.DataFrame(data, chunk_size=2) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(), concat=True)[0], data.head()) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(3), concat=True)[0], data.head(3)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(-3), concat=True)[0], data.head(-3)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(8), concat=True)[0], data.head(8)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(-8), concat=True)[0], data.head(-8)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(13), concat=True)[0], data.head(13)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.head(-13), concat=True)[0], data.head(-13)) def testTail(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c1', 'c2', 'c3', 'c4', 'c5']) df = md.DataFrame(data, chunk_size=2) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(), concat=True)[0], data.tail()) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(3), concat=True)[0], data.tail(3)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(-3), concat=True)[0], data.tail(-3)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(8), concat=True)[0], data.tail(8)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(-8), concat=True)[0], data.tail(-8)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(13), concat=True)[0], data.tail(13)) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.tail(-13), concat=True)[0], data.tail(-13)) def testAt(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c' + str(i) for i in range(5)], index=['i' + str(i) for i in range(10)]) df = md.DataFrame(data, chunk_size=3) with self.assertRaises(ValueError): _ = df.at[['i3, i4'], 'c1'] result = self.executor.execute_dataframe(df.at['i3', 'c1'], concat=True)[0] self.assertEqual(result, data.at['i3', 'c1']) result = self.executor.execute_dataframe(df['c1'].at['i2'], concat=True)[0] self.assertEqual(result, data['c1'].at['i2']) def testIAt(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c' + str(i) for i in range(5)], index=['i' + str(i) for i in range(10)]) df = md.DataFrame(data, chunk_size=3) with self.assertRaises(ValueError): _ = df.iat[[1, 2], 3] result = self.executor.execute_dataframe(df.iat[3, 4], concat=True)[0] self.assertEqual(result, data.iat[3, 4]) result = self.executor.execute_dataframe(df.iloc[:, 2].iat[3], concat=True)[0] self.assertEqual(result, data.iloc[:, 2].iat[3])
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') def testIndexTricks(self): mgrid = nd_grid() g = mgrid[0:5, 0:5] g.tiles() # tileable means no loop exists ogrid = nd_grid(sparse=True) o = ogrid[0:5, 0:5] [ob.tiles() for ob in o] # tilesable means no loop exists def testR_(self): r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j, [0] * 3, 5, 6] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[-1:1:6j, [0] * 3, 5, 6] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[-1:1:6j] np.testing.assert_array_equal(result, expected) raw = [[0, 1, 2], [3, 4, 5]] a = mt.array(raw, chunk_size=2) r = mt.r_['-1', a, a] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['-1', raw, raw] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) self.assertEqual(len(mt.r_), 0) with self.assertRaises(ValueError): _ = mt.r_[:3, 'wrong'] def testC_(self): r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] np.testing.assert_array_equal(result, expected) r = mt.c_[:3, 1:4] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[:3, 1:4] np.testing.assert_array_equal(result, expected)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest('numpy') @unittest.skipIf(tiledb is None, 'tiledb not installed') def testStoreTileDBExecution(self): ctx = tiledb.Ctx() tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.random.rand(8, 4, 3) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store tensor with 1 chunk to TileDB dense array a = arange(12) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(np.arange(12), arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store 2-d TileDB sparse array expected = sps.random(8, 7, density=0.1) a = tensor(expected, chunk_size=(3, 5)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr: data = arr[:, :] coords = data['coords'] value = data[arr.attr(0).name] ij = tuple(coords[arr.domain.dim(k).name] for k in range(arr.ndim)) result = sps.coo_matrix((value, ij), shape=arr.shape) np.testing.assert_allclose(expected.toarray(), result.toarray()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.asfortranarray(np.random.rand(8, 4, 3)) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) self.assertEqual(arr.schema.cell_order, 'col-major') finally: shutil.rmtree(tempdir)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testEuclideanDistancesOp(self): x = mt.random.rand(10, 3) xx = mt.random.rand(1, 10) y = mt.random.rand(11, 3) d = euclidean_distances(x, X_norm_squared=xx) self.assertEqual(d.op.x_norm_squared.key, check_array(xx).T.key) d = euclidean_distances( x, y, X_norm_squared=mt.random.rand(10, 1, dtype=mt.float32), Y_norm_squared=mt.random.rand(1, 11, dtype=mt.float32)) self.assertIsNone(d.op.x_norm_squared) self.assertIsNone(d.op.y_norm_squared) # XX shape incompatible with self.assertRaises(ValueError): euclidean_distances(x, X_norm_squared=mt.random.rand(10)) # XX shape incompatible with self.assertRaises(ValueError): euclidean_distances(x, X_norm_squared=mt.random.rand(11, 1)) # YY shape incompatible with self.assertRaises(ValueError): euclidean_distances(x, y, Y_norm_squared=mt.random.rand(10)) def testEuclideanDistancesExecution(self): dense_raw_x = np.random.rand(30, 10) dense_raw_y = np.random.rand(40, 10) sparse_raw_x = SparseNDArray( sps.random(30, 10, density=0.5, format='csr')) sparse_raw_y = SparseNDArray( sps.random(40, 10, density=0.5, format='csr')) for raw_x, raw_y in [(dense_raw_x, dense_raw_y), (sparse_raw_x, sparse_raw_y)]: x = mt.tensor(raw_x, chunk_size=9) y = mt.tensor(raw_y, chunk_size=7) distance = euclidean_distances(x, y) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, Y=raw_y) np.testing.assert_almost_equal(result, expected) x_norm = x.sum(axis=1)[..., np.newaxis] y_norm = y.sum(axis=1)[np.newaxis, ...] distance = euclidean_distances(x, y, X_norm_squared=x_norm, Y_norm_squared=y_norm) x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis] y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...] result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm, Y_norm_squared=y_raw_norm) np.testing.assert_almost_equal(result, expected) x_sq = (x**2).astype(np.float32) y_sq = (y**2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) x_raw_sq = (raw_x**2).astype(np.float32) y_raw_sq = (raw_y**2).astype(np.float32) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) np.testing.assert_almost_equal(result, expected, decimal=6) # test x is y distance = euclidean_distances(x) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x) np.testing.assert_almost_equal(result, expected)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testFromPandasDataFrameExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=[np.arange(20), np.arange(20, 0, -1)]) df = from_pandas_df(pdf, chunk_size=(13, 21)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal(pdf, result) def testFromPandasSeriesExecution(self): ps = pd.Series(np.random.rand(20), index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = from_pandas_series(ps, chunk_size=13) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal(ps, result) def testInitializerExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=[np.arange(20), np.arange(20, 0, -1)]) df = md.DataFrame(pdf, chunk_size=(15, 10)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal(pdf, result) ps = pd.Series(np.random.rand(20), index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = md.Series(ps, chunk_size=7) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal(ps, result) def testSeriesFromTensor(self): data = np.random.rand(10) series = md.Series(mt.tensor(data), name='a') pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a')) series = md.Series(mt.tensor(data, chunk_size=3)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data)) series = md.Series(mt.ones((10, ), chunk_size=4)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(np.ones(10, ))) index_data = np.random.rand(10) series = md.Series(mt.tensor(data, chunk_size=3), name='a', index=mt.tensor(index_data, chunk_size=4)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a', index=index_data)) def testFromTensorExecution(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = dataframe_from_tensor(tensor) tensor_res = self.executor.execute_tensor(tensor, concat=True)[0] pdf_expected = pd.DataFrame(tensor_res) df_result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10)) pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10)) pd.testing.assert_frame_equal(df_result, pdf_expected) # test converted with specified index_value and columns tensor2 = mt.random.rand(2, 2, chunk_size=1) df2 = dataframe_from_tensor(tensor2, index=pd.Index(['a', 'b']), columns=pd.Index([3, 4])) df_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b'])) pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4])) # test converted from 1-d tensor tensor3 = mt.array([1, 2, 3]) df3 = dataframe_from_tensor(tensor3) result3 = self.executor.execute_dataframe(df3, concat=True)[0] pdf_expected = pd.DataFrame(np.array([1, 2, 3])) pd.testing.assert_frame_equal(pdf_expected, result3) # test converted from identical chunks tensor4 = mt.ones((10, 10), chunk_size=3) df4 = dataframe_from_tensor(tensor4) result4 = self.executor.execute_dataframe(df4, concat=True)[0] pdf_expected = pd.DataFrame( self.executor.execute_tensor(tensor4, concat=True)[0]) pd.testing.assert_frame_equal(pdf_expected, result4) # from tensor with given index tensor5 = mt.ones((10, 10), chunk_size=3) df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2)) result5 = self.executor.execute_dataframe(df5, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor5, concat=True)[0], index=np.arange(0, 20, 2)) pd.testing.assert_frame_equal(pdf_expected, result5) # from tensor with given index that is a tensor raw7 = np.random.rand(10, 10) tensor7 = mt.tensor(raw7, chunk_size=3) index_raw7 = np.random.rand(10) index7 = mt.tensor(index_raw7, chunk_size=4) df7 = dataframe_from_tensor(tensor7, index=index7) result7 = self.executor.execute_dataframe(df7, concat=True)[0] pdf_expected = pd.DataFrame(raw7, index=index_raw7) pd.testing.assert_frame_equal(pdf_expected, result7) # from tensor with given columns tensor6 = mt.ones((10, 10), chunk_size=3) df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij')) result6 = self.executor.execute_dataframe(df6, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor6, concat=True)[0], columns=list('abcdefghij')) pd.testing.assert_frame_equal(pdf_expected, result6) # from 1d tensors raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10, size=8)), ('c', [ ''.join(np.random.choice(list(printable), size=6)) for _ in range(8) ])] tensors8 = [mt.tensor(r[1], chunk_size=3) for r in raws8] df8 = dataframe_from_1d_tensors(tensors8, columns=[r[0] for r in raws8]) result = self.executor.execute_dataframe(df8, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8)) pd.testing.assert_frame_equal(result, pdf_expected) # from 1d tensors and specify index with a tensor index_raw9 = np.random.rand(8) index9 = mt.tensor(index_raw9, chunk_size=4) df9 = dataframe_from_1d_tensors(tensors8, columns=[r[0] for r in raws8], index=index9) result = self.executor.execute_dataframe(df9, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9) pd.testing.assert_frame_equal(result, pdf_expected) def testFromRecordsExecution(self): dtype = np.dtype([('x', 'int'), ('y', 'double'), ('z', '<U16')]) ndarr = np.ones((10, ), dtype=dtype) pdf_expected = pd.DataFrame.from_records(ndarr, index=pd.RangeIndex(10)) # from structured array of mars tensor = mt.ones((10, ), dtype=dtype, chunk_size=3) df1 = from_records(tensor) df1_result = self.executor.execute_dataframe(df1, concat=True)[0] pd.testing.assert_frame_equal(df1_result, pdf_expected) # from structured array of numpy df2 = from_records(ndarr) df2_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_frame_equal(df2_result, pdf_expected) def testReadCSVExecution(self): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c']) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0, chunk_bytes=10), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test sep tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c']) df.to_csv(file_path, sep=';') pdf = pd.read_csv(file_path, sep=';', index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, sep=';', index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, sep=';', index_col=0, chunk_bytes=10), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test missing value tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame({ 'c1': [np.nan, 'a', 'b', 'c'], 'c2': [1, 2, 3, np.nan], 'c3': [np.nan, np.nan, 3.4, 2.2] }) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0, chunk_bytes=12), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: index = pd.date_range(start='1/1/2018', periods=100) df = pd.DataFrame( { 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }, index=index) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, index_col=0, chunk_bytes=100), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test compression tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.gzip') try: index = pd.date_range(start='1/1/2018', periods=100) df = pd.DataFrame( { 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }, index=index) df.to_csv(file_path, compression='gzip') pdf = pd.read_csv(file_path, compression='gzip', index_col=0) mdf = self.executor.execute_dataframe(md.read_csv( file_path, compression='gzip', index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, compression='gzip', index_col=0, chunk_bytes='1k'), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test multiply files tempdir = tempfile.mkdtemp() try: df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c']) file_paths = [ os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3) ] df[:100].to_csv(file_paths[0]) df[100:200].to_csv(file_paths[1]) df[200:].to_csv(file_paths[2]) mdf = self.executor.execute_dataframe(md.read_csv(file_paths, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(df, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_paths, index_col=0, chunk_bytes=50), concat=True)[0] pd.testing.assert_frame_equal(df, mdf2) finally: shutil.rmtree(tempdir) # test wildcards in path tempdir = tempfile.mkdtemp() try: df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c']) file_paths = [ os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3) ] df[:100].to_csv(file_paths[0]) df[100:200].to_csv(file_paths[1]) df[200:].to_csv(file_paths[2]) # As we can not guarantee the order in which these files are processed, # the result may not keep the original order. mdf = self.executor.execute_dataframe(md.read_csv( '{}/*.csv'.format(tempdir), index_col=0), concat=True)[0] pd.testing.assert_frame_equal(df, mdf.sort_index()) mdf2 = self.executor.execute_dataframe(md.read_csv( '{}/*.csv'.format(tempdir), index_col=0, chunk_bytes=50), concat=True)[0] pd.testing.assert_frame_equal(df, mdf2.sort_index()) finally: shutil.rmtree(tempdir) @require_cudf def testReadCSVGPUExecution(self): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame({ 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }) df.to_csv(file_path, index=False) pdf = pd.read_csv(file_path) mdf = self.executor.execute_dataframe(md.read_csv(file_path, gpu=True), concat=True)[0] pd.testing.assert_frame_equal( pdf.reset_index(drop=True), mdf.to_pandas().reset_index(drop=True)) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, gpu=True, chunk_bytes=200), concat=True)[0] pd.testing.assert_frame_equal( pdf.reset_index(drop=True), mdf2.to_pandas().reset_index(drop=True)) finally: shutil.rmtree(tempdir) def testReadCSVWithoutIndex(self): sess = new_session() # test csv file without storing index tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c']) df.to_csv(file_path, index=False) pdf = pd.read_csv(file_path) mdf = sess.run(md.read_csv(file_path, sort_range_index=True)) pd.testing.assert_frame_equal(pdf, mdf) mdf2 = sess.run( md.read_csv(file_path, sort_range_index=True, chunk_bytes=10)) pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) def testReadSQLTableExecution(self): import sqlalchemy as sa test_df = pd.DataFrame({ 'a': np.arange(10).astype(np.int64, copy=False), 'b': ['s%d' % i for i in range(10)], 'c': np.random.rand(10) }) with tempfile.TemporaryDirectory() as d: table_name = 'test' table_name2 = 'test2' uri = 'sqlite:///' + os.path.join(d, 'test.db') test_df.to_sql(table_name, uri, index=False) r = md.read_sql_table('test', uri, chunk_size=4) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, test_df) engine = sa.create_engine(uri) m = sa.MetaData() try: # test index_col and columns r = md.read_sql_table('test', engine.connect(), chunk_size=4, index_col='a', columns=['b']) result = self.executor.execute_dataframe(r, concat=True)[0] expected = test_df.copy(deep=True) expected.set_index('a', inplace=True) del expected['c'] pd.testing.assert_frame_equal(result, expected) # do not specify chunk_size r = md.read_sql_table('test', engine.connect(), index_col='a', columns=['b']) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, expected) table = sa.Table(table_name, m, autoload=True, autoload_with=engine) r = md.read_sql_table( table, engine, chunk_size=4, index_col=[table.columns['a'], table.columns['b']], columns=[table.columns['c']]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = test_df.copy(deep=True) expected.set_index(['a', 'b'], inplace=True) pd.testing.assert_frame_equal(result, expected) # test primary key sa.Table(table_name2, m, sa.Column('id', sa.Integer, primary_key=True), sa.Column('a', sa.Integer), sa.Column('b', sa.String), sa.Column('c', sa.Float)) m.create_all(engine) test_df = test_df.copy(deep=True) test_df.index.name = 'id' test_df.to_sql(table_name2, uri, if_exists='append') r = md.read_sql_table(table_name2, engine, chunk_size=4, index_col='id') result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(result, test_df) finally: engine.dispose()
class TestUnary(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testAbs(self): data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10))) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(df1.abs(), concat=True)[0] expected = data1.abs() pd.testing.assert_frame_equal(expected, result) result = self.executor.execute_dataframe(abs(df1), concat=True)[0] pd.testing.assert_frame_equal(expected, result) def testNot(self): data1 = pd.DataFrame( np.random.uniform(low=-1, high=1, size=(10, 10)) > 0) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(~df1, concat=True)[0] expected = ~data1 pd.testing.assert_frame_equal(expected, result) def testUfunc(self): df_raw = pd.DataFrame(np.random.uniform(size=(10, 10)), index=pd.RangeIndex(9, -1, -1)) df = from_pandas(df_raw, chunk_size=5) series_raw = pd.Series(np.random.uniform(size=10), index=pd.RangeIndex(9, -1, -1)) series = from_pandas_series(series_raw, chunk_size=5) ufuncs = [[np.abs, mt.abs], [np.log, mt.log], [np.log2, mt.log2], [np.log10, mt.log10], [np.sin, mt.sin], [np.cos, mt.cos], [np.tan, mt.tan], [np.sinh, mt.sinh], [np.cosh, mt.cosh], [np.tanh, mt.tanh], [np.arcsin, mt.arcsin], [np.arccos, mt.arccos], [np.arctan, mt.arctan], [np.arcsinh, mt.arcsinh], [np.arccosh, mt.arccosh], [np.arctanh, mt.arctanh], [np.radians, mt.radians], [np.degrees, mt.degrees], [np.ceil, mt.ceil], [np.floor, mt.floor], [ partial(np.around, decimals=2), partial(mt.around, decimals=2) ], [np.exp, mt.exp], [np.exp2, mt.exp2], [np.expm1, mt.expm1], [np.sqrt, mt.sqrt]] for raw, data in [(df_raw, df), (series_raw, series)]: for npf, mtf in ufuncs: r = mtf(data) result = self.executor.execute_tensor(r, concat=True)[0] expected = npf(raw) if isinstance(raw, pd.DataFrame): pd.testing.assert_frame_equal(result, expected) else: pd.testing.assert_series_equal(result, expected) # test numpy ufunc r = npf(data) result = self.executor.execute_tensor(r, concat=True)[0] if isinstance(raw, pd.DataFrame): pd.testing.assert_frame_equal(result, expected) else: pd.testing.assert_series_equal(result, expected)
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testSumProdExecution(self): arr = ones((10, 8), chunk_size=3) self.assertEqual([80], self.executor.execute_tensor(arr.sum())) self.assertEqual((10,) * 8, tuple(np.concatenate(self.executor.execute_tensor(arr.sum(axis=0))))) arr = ones((3, 3), chunk_size=2) self.assertEqual([512], self.executor.execute_tensor((arr * 2).prod())) self.assertEqual((8,) * 3, tuple(np.concatenate(self.executor.execute_tensor((arr * 2).prod(axis=0))))) raw = sps.random(10, 20, density=.1) arr = tensor(raw, chunk_size=3) res = self.executor.execute_tensor(arr.sum())[0] self.assertAlmostEqual(res, raw.sum()) # test order raw = np.asfortranarray(np.random.rand(10, 20, 30)) arr = tensor(raw, chunk_size=13) arr2 = arr.sum(axis=-1) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.sum(axis=-1) np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) # test string dtype a = tensor(list('abcdefghi'), dtype=object) self.assertEqual(self.executor.execute_tensor(a.sum(), concat=True)[0], 'abcdefghi') a = tensor(list('abcdefghi'), dtype=object, chunk_size=2) self.assertEqual(self.executor.execute_tensor(a.sum(), concat=True)[0], 'abcdefghi') def testMaxMinExecution(self): raw = np.random.randint(10000, size=(10, 10, 10)) arr = tensor(raw, chunk_size=3) self.assertEqual([raw.max()], self.executor.execute_tensor(arr.max())) self.assertEqual([raw.min()], self.executor.execute_tensor(arr.min())) np.testing.assert_array_equal( raw.max(axis=0), self.executor.execute_tensor(arr.max(axis=0), concat=True)[0]) self.assertFalse(arr.max(axis=0).issparse()) np.testing.assert_array_equal( raw.min(axis=0), self.executor.execute_tensor(arr.min(axis=0), concat=True)[0]) self.assertFalse(arr.min(axis=0).issparse()) np.testing.assert_array_equal( raw.max(axis=(1, 2)), self.executor.execute_tensor(arr.max(axis=(1, 2)), concat=True)[0]) np.testing.assert_array_equal( raw.min(axis=(1, 2)), self.executor.execute_tensor(arr.min(axis=(1, 2)), concat=True)[0]) raw = sps.random(10, 10, density=.5) arr = tensor(raw, chunk_size=3) self.assertEqual([raw.max()], self.executor.execute_tensor(arr.max())) self.assertEqual([raw.min()], self.executor.execute_tensor(arr.min())) np.testing.assert_almost_equal( raw.max(axis=1).A.ravel(), self.executor.execute_tensor(arr.max(axis=1), concat=True)[0].toarray()) self.assertTrue(arr.max(axis=1).issparse()) np.testing.assert_almost_equal( raw.min(axis=1).A.ravel(), self.executor.execute_tensor(arr.min(axis=1), concat=True)[0].toarray()) self.assertTrue(arr.min(axis=1).issparse()) # test string dtype a = tensor(list('abcdefghi'), dtype=object) self.assertEqual(self.executor.execute_tensor(a.max(), concat=True)[0], 'i') a = tensor(list('abcdefghi'), dtype=object, chunk_size=2) self.assertEqual(self.executor.execute_tensor(a.max(), concat=True)[0], 'i') def testAllAnyExecution(self): raw1 = np.zeros((10, 15)) raw2 = np.ones((10, 15)) raw3 = np.array([[True, False, True, False], [True, True, True, True], [False, False, False, False], [False, True, False, True]]) arr1 = tensor(raw1, chunk_size=3) arr2 = tensor(raw2, chunk_size=3) arr3 = tensor(raw3, chunk_size=4) self.assertFalse(self.executor.execute_tensor(arr1.all())[0]) self.assertTrue(self.executor.execute_tensor(arr2.all())[0]) self.assertFalse(self.executor.execute_tensor(arr1.any())[0]) self.assertTrue(self.executor.execute_tensor(arr1.any())) np.testing.assert_array_equal(raw3.all(axis=1), self.executor.execute_tensor(arr3.all(axis=1))[0]) np.testing.assert_array_equal(raw3.any(axis=0), self.executor.execute_tensor(arr3.any(axis=0))[0]) raw = sps.random(10, 10, density=.5) > .5 arr = tensor(raw, chunk_size=3) self.assertEqual(raw.A.all(), self.executor.execute_tensor(arr.all())[0]) self.assertEqual(raw.A.any(), self.executor.execute_tensor(arr.any())[0]) # test string dtype a = tensor(list('abcdefghi'), dtype=object) self.assertEqual(self.executor.execute_tensor(a.all(), concat=True)[0], 'i') a = tensor(list('abcdefghi'), dtype=object, chunk_size=2) self.assertEqual(self.executor.execute_tensor(a.any(), concat=True)[0], 'a') def testMeanExecution(self): raw1 = np.random.random((20, 25)) raw2 = np.random.randint(10, size=(20, 25)) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.mean()) expected1 = raw1.mean() self.assertTrue(np.allclose(res1[0], expected1)) res2 = self.executor.execute_tensor(arr1.mean(axis=0)) expected2 = raw1.mean(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr1.mean(axis=1, keepdims=True)) expected3 = raw1.mean(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) arr2 = tensor(raw2, chunk_size=3) res1 = self.executor.execute_tensor(arr2.mean()) expected1 = raw2.mean() self.assertEqual(res1[0], expected1) res2 = self.executor.execute_tensor(arr2.mean(axis=0)) expected2 = raw2.mean(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr2.mean(axis=1, keepdims=True)) expected3 = raw2.mean(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) raw1 = sps.random(20, 25, density=.1) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.mean()) expected1 = raw1.mean() self.assertTrue(np.allclose(res1[0], expected1)) arr2 = tensor(raw1, chunk_size=30) res1 = self.executor.execute_tensor(arr2.mean()) expected1 = raw1.mean() self.assertTrue(np.allclose(res1[0], expected1)) arr = mean(1) self.assertEqual(self.executor.execute_tensor(arr)[0], 1) with self.assertRaises(TypeError): self.executor.execute_tensor(tensor(list('abcdefghi'), dtype=object).mean()) def testVarExecution(self): raw1 = np.random.random((20, 25)) raw2 = np.random.randint(10, size=(20, 25)) arr0 = tensor(raw1, chunk_size=25) res1 = self.executor.execute_tensor(arr0.var()) expected1 = raw1.var() self.assertTrue(np.allclose(res1[0], expected1)) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.var()) expected1 = raw1.var() self.assertTrue(np.allclose(res1[0], expected1)) res2 = self.executor.execute_tensor(arr1.var(axis=0)) expected2 = raw1.var(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr1.var(axis=1, keepdims=True)) expected3 = raw1.var(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) arr2 = tensor(raw2, chunk_size=3) res1 = self.executor.execute_tensor(arr2.var()) expected1 = raw2.var() self.assertAlmostEqual(res1[0], expected1) res2 = self.executor.execute_tensor(arr2.var(axis=0)) expected2 = raw2.var(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr2.var(axis=1, keepdims=True)) expected3 = raw2.var(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) res4 = self.executor.execute_tensor(arr2.var(ddof=1)) expected4 = raw2.var(ddof=1) self.assertAlmostEqual(res4[0], expected4) raw1 = sps.random(20, 25, density=.1) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.var()) expected1 = raw1.toarray().var() self.assertTrue(np.allclose(res1[0], expected1)) arr2 = tensor(raw1, chunk_size=30) res1 = self.executor.execute_tensor(arr2.var()) expected1 = raw1.toarray().var() self.assertTrue(np.allclose(res1[0], expected1)) arr = var(1) self.assertEqual(self.executor.execute_tensor(arr)[0], 0) def testStdExecution(self): raw1 = np.random.random((20, 25)) raw2 = np.random.randint(10, size=(20, 25)) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.std()) expected1 = raw1.std() self.assertTrue(np.allclose(res1[0], expected1)) res2 = self.executor.execute_tensor(arr1.std(axis=0)) expected2 = raw1.std(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr1.std(axis=1, keepdims=True)) expected3 = raw1.std(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) arr2 = tensor(raw2, chunk_size=3) res1 = self.executor.execute_tensor(arr2.std()) expected1 = raw2.std() self.assertAlmostEqual(res1[0], expected1) res2 = self.executor.execute_tensor(arr2.std(axis=0)) expected2 = raw2.std(axis=0) self.assertTrue(np.allclose(np.concatenate(res2), expected2)) res3 = self.executor.execute_tensor(arr2.std(axis=1, keepdims=True)) expected3 = raw2.std(axis=1, keepdims=True) self.assertTrue(np.allclose(np.concatenate(res3), expected3)) res4 = self.executor.execute_tensor(arr2.std(ddof=1)) expected4 = raw2.std(ddof=1) self.assertAlmostEqual(res4[0], expected4) raw1 = sps.random(20, 25, density=.1) arr1 = tensor(raw1, chunk_size=3) res1 = self.executor.execute_tensor(arr1.std()) expected1 = raw1.toarray().std() self.assertTrue(np.allclose(res1[0], expected1)) arr2 = tensor(raw1, chunk_size=30) res1 = self.executor.execute_tensor(arr2.std()) expected1 = raw1.toarray().std() self.assertTrue(np.allclose(res1[0], expected1)) arr = std(1) self.assertEqual(self.executor.execute_tensor(arr)[0], 0) def testArgReduction(self): raw = np.random.random((20, 20, 20)) arr = tensor(raw, chunk_size=3) self.assertEqual(raw.argmax(), self.executor.execute_tensor(arr.argmax())[0]) self.assertEqual(raw.argmin(), self.executor.execute_tensor(arr.argmin())[0]) np.testing.assert_array_equal( raw.argmax(axis=0), self.executor.execute_tensor(arr.argmax(axis=0), concat=True)[0]) np.testing.assert_array_equal( raw.argmin(axis=0), self.executor.execute_tensor(arr.argmin(axis=0), concat=True)[0]) raw_format = sps.random(20, 20, density=.1, format='lil') random_min = np.random.randint(0, 200) random_max = np.random.randint(200, 400) raw_format[np.unravel_index(random_min, raw_format.shape)] = -1 raw_format[np.unravel_index(random_max, raw_format.shape)] = 2 raw = raw_format.tocoo() arr = tensor(raw, chunk_size=3) self.assertEqual(raw.argmax(), self.executor.execute_tensor(arr.argmax())[0]) self.assertEqual(raw.argmin(), self.executor.execute_tensor(arr.argmin())[0]) # test order raw = np.asfortranarray(np.random.rand(10, 20, 30)) arr = tensor(raw, chunk_size=13) arr2 = arr.argmax(axis=-1) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.argmax(axis=-1) np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) with self.assertRaises(TypeError): self.executor.execute_tensor(tensor(list('abcdefghi'), dtype=object).argmax()) @ignore_warning def testNanReduction(self): raw = np.random.choice(a=[0, 1, np.nan], size=(10, 10), p=[0.3, 0.4, 0.3]) arr = tensor(raw, chunk_size=3) self.assertEqual(np.nansum(raw), self.executor.execute_tensor(nansum(arr))[0]) self.assertEqual(np.nanprod(raw), self.executor.execute_tensor(nanprod(arr))[0]) self.assertEqual(np.nanmax(raw), self.executor.execute_tensor(nanmax(arr))[0]) self.assertEqual(np.nanmin(raw), self.executor.execute_tensor(nanmin(arr))[0]) self.assertEqual(np.nanmean(raw), self.executor.execute_tensor(nanmean(arr))[0]) self.assertAlmostEqual(np.nanvar(raw), self.executor.execute_tensor(nanvar(arr))[0]) self.assertAlmostEqual(np.nanvar(raw, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0]) self.assertAlmostEqual(np.nanstd(raw), self.executor.execute_tensor(nanstd(arr))[0]) self.assertAlmostEqual(np.nanstd(raw, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0]) arr = tensor(raw, chunk_size=10) self.assertEqual(np.nansum(raw), self.executor.execute_tensor(nansum(arr))[0]) self.assertEqual(np.nanprod(raw), self.executor.execute_tensor(nanprod(arr))[0]) self.assertEqual(np.nanmax(raw), self.executor.execute_tensor(nanmax(arr))[0]) self.assertEqual(np.nanmin(raw), self.executor.execute_tensor(nanmin(arr))[0]) self.assertEqual(np.nanmean(raw), self.executor.execute_tensor(nanmean(arr))[0]) self.assertAlmostEqual(np.nanvar(raw), self.executor.execute_tensor(nanvar(arr))[0]) self.assertAlmostEqual(np.nanvar(raw, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0]) self.assertAlmostEqual(np.nanstd(raw), self.executor.execute_tensor(nanstd(arr))[0]) self.assertAlmostEqual(np.nanstd(raw, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0]) raw = np.random.random((10, 10)) raw[:3, :3] = np.nan arr = tensor(raw, chunk_size=3) self.assertEqual(np.nanargmin(raw), self.executor.execute_tensor(nanargmin(arr))[0]) self.assertEqual(np.nanargmax(raw), self.executor.execute_tensor(nanargmax(arr))[0]) raw = np.full((10, 10), np.nan) arr = tensor(raw, chunk_size=3) self.assertEqual(0, self.executor.execute_tensor(nansum(arr))[0]) self.assertEqual(1, self.executor.execute_tensor(nanprod(arr))[0]) self.assertTrue(np.isnan(self.executor.execute_tensor(nanmax(arr))[0])) self.assertTrue(np.isnan(self.executor.execute_tensor(nanmin(arr))[0])) self.assertTrue(np.isnan(self.executor.execute_tensor(nanmean(arr))[0])) with self.assertRaises(ValueError): _ = self.executor.execute_tensor(nanargmin(arr))[0] # noqa: F841 with self.assertRaises(ValueError): _ = self.executor.execute_tensor(nanargmax(arr))[0] # noqa: F841 raw = sps.random(10, 10, density=.1, format='csr') raw[:3, :3] = np.nan arr = tensor(raw, chunk_size=3) self.assertAlmostEqual(np.nansum(raw.A), self.executor.execute_tensor(nansum(arr))[0]) self.assertAlmostEqual(np.nanprod(raw.A), self.executor.execute_tensor(nanprod(arr))[0]) self.assertAlmostEqual(np.nanmax(raw.A), self.executor.execute_tensor(nanmax(arr))[0]) self.assertAlmostEqual(np.nanmin(raw.A), self.executor.execute_tensor(nanmin(arr))[0]) self.assertAlmostEqual(np.nanmean(raw.A), self.executor.execute_tensor(nanmean(arr))[0]) self.assertAlmostEqual(np.nanvar(raw.A), self.executor.execute_tensor(nanvar(arr))[0]) self.assertAlmostEqual(np.nanvar(raw.A, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0]) self.assertAlmostEqual(np.nanstd(raw.A), self.executor.execute_tensor(nanstd(arr))[0]) self.assertAlmostEqual(np.nanstd(raw.A, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0]) arr = nansum(1) self.assertEqual(self.executor.execute_tensor(arr)[0], 1) def testCumReduction(self): raw = np.random.randint(5, size=(8, 8, 8)) arr = tensor(raw, chunk_size=3) res1 = self.executor.execute_tensor(arr.cumsum(axis=1), concat=True) res2 = self.executor.execute_tensor(arr.cumprod(axis=1), concat=True) expected1 = raw.cumsum(axis=1) expected2 = raw.cumprod(axis=1) np.testing.assert_array_equal(res1[0], expected1) np.testing.assert_array_equal(res2[0], expected2) raw = sps.random(8, 8, density=.1) arr = tensor(raw, chunk_size=3) res1 = self.executor.execute_tensor(arr.cumsum(axis=1), concat=True) res2 = self.executor.execute_tensor(arr.cumprod(axis=1), concat=True) expected1 = raw.A.cumsum(axis=1) expected2 = raw.A.cumprod(axis=1) self.assertTrue(np.allclose(res1[0], expected1)) self.assertTrue(np.allclose(res2[0], expected2)) # test order raw = np.asfortranarray(np.random.rand(10, 20, 30)) arr = tensor(raw, chunk_size=13) arr2 = arr.cumsum(axis=-1) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.cumsum(axis=-1) np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) # test string dtype a = tensor(list('abcdefghi'), dtype=object) np.testing.assert_array_equal(self.executor.execute_tensor(a.cumsum(), concat=True)[0], np.cumsum(np.array(list('abcdefghi'), dtype=object))) a = tensor(list('abcdefghi'), dtype=object, chunk_size=2) np.testing.assert_array_equal(self.executor.execute_tensor(a.cumsum(), concat=True)[0], np.cumsum(np.array(list('abcdefghi'), dtype=object))) def testNanCumReduction(self): raw = np.random.randint(5, size=(8, 8, 8)) raw[:2, 2:4, 4:6] = np.nan arr = tensor(raw, chunk_size=3) res1 = self.executor.execute_tensor(nancumsum(arr, axis=1), concat=True) res2 = self.executor.execute_tensor(nancumprod(arr, axis=1), concat=True) expected1 = np.nancumsum(raw, axis=1) expected2 = np.nancumprod(raw, axis=1) np.testing.assert_array_equal(res1[0], expected1) np.testing.assert_array_equal(res2[0], expected2) raw = sps.random(8, 8, density=.1, format='lil') raw[:2, 2:4] = np.nan arr = tensor(raw, chunk_size=3) res1 = self.executor.execute_tensor(nancumsum(arr, axis=1), concat=True)[0] res2 = self.executor.execute_tensor(nancumprod(arr, axis=1), concat=True)[0] expected1 = np.nancumsum(raw.A, axis=1) expected2 = np.nancumprod(raw.A, axis=1) self.assertTrue(np.allclose(res1, expected1)) self.assertTrue(np.allclose(res2, expected2)) def testOutReductionExecution(self): raw = np.random.randint(5, size=(8, 8, 8)) arr = tensor(raw, chunk_size=3) arr2 = ones((8, 8), dtype='i8', chunk_size=3) arr.sum(axis=1, out=arr2) res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw.sum(axis=1) np.testing.assert_array_equal(res, expected) def testOutCumReductionExecution(self): raw = np.random.randint(5, size=(8, 8, 8)) arr = tensor(raw, chunk_size=3) arr.cumsum(axis=0, out=arr) res = self.executor.execute_tensor(arr, concat=True)[0] expected = raw.cumsum(axis=0) np.testing.assert_array_equal(res, expected) def testCountNonzeroExecution(self): raw = [[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]] arr = tensor(raw, chunk_size=5) t = count_nonzero(arr) res = self.executor.execute_tensor(t)[0] expected = np.count_nonzero(raw) np.testing.assert_equal(res, expected) arr = tensor(raw, chunk_size=2) t = count_nonzero(arr) res = self.executor.execute_tensor(t)[0] expected = np.count_nonzero(raw) np.testing.assert_equal(res, expected) t = count_nonzero(arr, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.count_nonzero(raw, axis=0) np.testing.assert_equal(res, expected) t = count_nonzero(arr, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.count_nonzero(raw, axis=1) np.testing.assert_equal(res, expected) raw = sps.csr_matrix(raw) arr = tensor(raw, chunk_size=2) t = count_nonzero(arr) res = self.executor.execute_tensor(t)[0] expected = np.count_nonzero(raw.A) np.testing.assert_equal(res, expected) t = count_nonzero(arr, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.count_nonzero(raw.A, axis=0) np.testing.assert_equal(res, expected) t = count_nonzero(arr, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.count_nonzero(raw.A, axis=1) np.testing.assert_equal(res, expected) # test string dtype a = tensor(list('abcdefghi'), dtype=object) self.assertEqual(self.executor.execute_tensor(count_nonzero(a), concat=True)[0], 9) a = tensor(list('abcdefghi'), dtype=object, chunk_size=2) self.assertEqual(self.executor.execute_tensor(count_nonzero(a), concat=True)[0], 9) def testAllcloseExecution(self): a = tensor([1e10, 1e-7], chunk_size=1) b = tensor([1.00001e10, 1e-8], chunk_size=1) t = allclose(a, b) res = self.executor.execute_tensor(t)[0] self.assertFalse(res) a = tensor([1e10, 1e-8], chunk_size=1) b = tensor([1.00001e10, 1e-9], chunk_size=1) t = allclose(a, b) res = self.executor.execute_tensor(t)[0] self.assertTrue(res) a = tensor([1.0, np.nan], chunk_size=1) b = tensor([1.0, np.nan], chunk_size=1) t = allclose(a, b, equal_nan=True) res = self.executor.execute_tensor(t)[0] self.assertTrue(res) a = tensor(sps.csr_matrix([[1e10, 1e-7], [0, 0]]), chunk_size=1) b = tensor(sps.csr_matrix([[1.00001e10, 1e-8], [0, 0]]), chunk_size=1) t = allclose(a, b) res = self.executor.execute_tensor(t)[0] self.assertFalse(res) # test string dtype with self.assertRaises(TypeError): a = tensor(list('abcdefghi'), dtype=object) self.executor.execute_tensor(allclose(a, a)) def testArrayEqual(self): a = ones((10, 5), chunk_size=1) b = ones((10, 5), chunk_size=2) c = array_equal(a, b) res = bool(self.executor.execute_tensor(c)[0]) self.assertTrue(res)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testCheckNonNegativeThenReturnValueExecution(self): raw = np.random.randint(10, size=(10, 5)) c = mt.tensor(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') result = self.executor.execute_tileable(r, concat=True)[0] np.testing.assert_array_equal(result, raw) raw = raw.copy() raw[1, 3] = -1 c = mt.tensor(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') with self.assertRaises(ValueError): _ = self.executor.execute_tileable(r, concat=True)[0] raw = sps.random(10, 5, density=.3, format='csr') c = mt.tensor(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') result = self.executor.execute_tileable(r, concat=True)[0] np.testing.assert_array_equal(result.toarray(), raw.A) raw = raw.copy() raw[1, 3] = -1 c = mt.tensor(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') with self.assertRaises(ValueError): _ = self.executor.execute_tileable(r, concat=True)[0] raw = pd.DataFrame(np.random.rand(10, 4)) c = md.DataFrame(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') result = self.executor.execute_tileable(r, concat=True)[0] pd.testing.assert_frame_equal(result, raw) raw = raw.copy() raw.iloc[1, 3] = -1 c = md.DataFrame(raw, chunk_size=(3, 2)) r = check_non_negative_then_return_value(c, c, 'sth') with self.assertRaises(ValueError): _ = self.executor.execute_tileable(r, concat=True)[0] def testAssertAllFinite(self): raw = np.array([2.3, np.inf], dtype=np.float64) x = mt.tensor(raw) with self.assertRaises(ValueError): r = assert_all_finite(x) _ = self.executor.execute_tensor(r) raw = np.array([2.3, np.nan], dtype=np.float64) x = mt.tensor(raw) with self.assertRaises(ValueError): r = assert_all_finite(x, allow_nan=False) _ = self.executor.execute_tensor(r) max_float32 = np.finfo(np.float32).max raw = [max_float32] * 2 self.assertFalse(np.isfinite(np.sum(raw))) x = mt.tensor(raw) r = assert_all_finite(x) result = self.executor.execute_tensor(r, concat=True)[0] self.assertTrue(result.item()) raw = np.array([np.nan, 'a'], dtype=object) x = mt.tensor(raw) with self.assertRaises(ValueError): r = assert_all_finite(x) _ = self.executor.execute_tensor(r) raw = np.random.rand(10) x = mt.tensor(raw, chunk_size=2) r = assert_all_finite(x, check_only=False) result = self.executor.execute_tensor(r, concat=True)[0] np.testing.assert_array_equal(result, raw) r = assert_all_finite(x) result = self.executor.execute_tensor(r, concat=True)[0] self.assertTrue(result.item()) with option_context() as options: options.learn.assume_finite = True self.assertIsNone(assert_all_finite(x)) self.assertIs(assert_all_finite(x, check_only=False), x) # test sparse s = sps.random(10, 3, density=0.1, format='csr', random_state=np.random.RandomState(0)) s[0, 2] = np.nan with self.assertRaises(ValueError): r = assert_all_finite(s) _ = self.executor.execute_tensor(r)
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testRandExecution(self): arr = tensor.random.rand(10, 20, chunk_size=3, dtype='f4') res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(res.shape, (10, 20)) self.assertTrue(np.all(res < 1)) self.assertTrue(np.all(res > 0)) self.assertEqual(res.dtype, np.float32) def testRandnExecution(self): arr = tensor.random.randn(10, 20, chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.randn(10, 20, chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).randn(5, 5))) def testRandintExecution(self): size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) arr = tensor.random.randint(0, 2, size=(10, 30), chunk_size=3) size_res = size_executor.execute_tensor(arr, mock=True) self.assertEqual(arr.nbytes, sum(tp[0] for tp in size_res)) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(res.shape, (10, 30)) self.assertTrue(np.all(res >= 0)) self.assertTrue(np.all(res < 2)) @ignore_warning def testRandomIntegersExecution(self): arr = tensor.random.random_integers(0, 10, size=(10, 20), chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.random_integers(0, 10, size=(10, 20), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): np.testing.assert_equal(res, np.random.RandomState(0).random_integers(0, 10, size=(5, 5))) def testRandomSampleExecution(self): arr = tensor.random.random_sample(size=(10, 20), chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.random_sample(size=(10, 20), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5)))) def testRandomExecution(self): arr = tensor.random.random(size=(10, 20), chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.random(size=(10, 20), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5)))) def testRandfExecution(self): arr = tensor.random.ranf(size=(10, 20), chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.ranf(size=(10, 20), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5)))) def testSampleExecution(self): arr = tensor.random.sample(size=(10, 20), chunk_size=3) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20)) arr = tensor.random.sample(size=(10, 20), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5)))) def testChoiceExecution(self): arr = tensor.random.choice(5, size=3, chunk_size=1) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,)) arr = tensor.random.choice(5, size=(15,), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).choice(5, size=(5,)))) arr = tensor.random.choice([1, 4, 9], size=3, chunk_size=1) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,)) arr = tensor.random.choice([1, 4, 9], size=(15,), chunk_size=5).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(5,)))) with self.assertRaises(ValueError): tensor.random.choice([1, 3, 4], size=5, replace=False, chunk_size=2) arr = tensor.random.choice([1, 4, 9], size=3, replace=False, chunk_size=1) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,)) arr = tensor.random.choice([1, 4, 9], size=(3,), replace=False, chunk_size=1).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue( np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(1,), replace=False))) arr = tensor.random.choice([1, 4, 9], size=3, p=[.2, .5, .3], chunk_size=1) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,)) arr = tensor.random.choice([1, 4, 9], size=(15,), chunk_size=5, p=[.2, .5, .3]).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue( np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(5,), p=[.2, .5, .3]))) def testSparseRandintExecution(self): size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) arr = tensor.random.randint(1, 2, size=(30, 50), density=.1, chunk_size=10, dtype='f4') size_res = size_executor.execute_tensor(arr, mock=True) self.assertAlmostEqual(arr.nbytes * 0.1, sum(tp[0] for tp in size_res)) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertTrue(issparse(res)) self.assertEqual(res.shape, (30, 50)) self.assertTrue(np.all(res.data >= 1)) self.assertTrue(np.all(res.data < 2)) self.assertAlmostEqual((res >= 1).toarray().sum(), 30 * 50 * .1, delta=20) def testBetaExecute(self): arr = tensor.random.beta(1, 2, chunk_size=2).tiles() arr.chunks[0].op._seed = 0 self.assertEqual(self.executor.execute_tensor(arr)[0], np.random.RandomState(0).beta(1, 2)) arr = tensor.random.beta([1, 2], [3, 4], chunk_size=2).tiles() arr.chunks[0].op._seed = 0 self.assertTrue(np.array_equal(self.executor.execute_tensor(arr)[0], np.random.RandomState(0).beta([1, 2], [3, 4]))) arr = tensor.random.beta([[2, 3]], from_ndarray([[4, 6], [5, 2]], chunk_size=2), chunk_size=1, size=(3, 2, 2)).tiles() for c in arr.chunks: c.op._seed = 0 res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(res[0, 0, 0], np.random.RandomState(0).beta(2, 4)) self.assertEqual(res[0, 0, 1], np.random.RandomState(0).beta(3, 6)) self.assertEqual(res[0, 1, 0], np.random.RandomState(0).beta(2, 5)) self.assertEqual(res[0, 1, 1], np.random.RandomState(0).beta(3, 2)) arr = tensor.random.RandomState(0).beta([[3, 4]], [[1], [2]], chunk_size=1) tensor.random.seed(0) arr2 = tensor.random.beta([[3, 4]], [[1], [2]], chunk_size=1) self.assertTrue(np.array_equal(self.executor.execute_tensor(arr, concat=True)[0], self.executor.execute_tensor(arr2, concat=True)[0])) def testBinomialExecute(self): arr = tensor.random.binomial(10, .5, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.binomial(10, .5, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).binomial(10, .5, 10))) def testChisquareExecute(self): arr = tensor.random.chisquare(2, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.chisquare(2, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).chisquare(2, 10))) def testDirichletExecute(self): arr = tensor.random.dirichlet((10, 5, 3), 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 3)) arr = tensor.random.dirichlet((10, 5, 3), 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).dirichlet((10, 5, 3), 10))) def testExponentialExecute(self): arr = tensor.random.exponential(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.exponential(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).exponential(1.0, 10))) def testFExecute(self): arr = tensor.random.f(1.0, 2.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.f(1.0, 2.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).f(1.0, 2.0, 10))) def testGammaExecute(self): arr = tensor.random.gamma(1.0, 2.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.gamma(1.0, 2.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).gamma(1.0, 2.0, 10))) def testGeometricExecution(self): arr = tensor.random.geometric(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.geometric(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).geometric(1.0, 10))) def testGumbelExecution(self): arr = tensor.random.gumbel(.5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.gumbel(.5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).gumbel(.5, 1.0, 10))) def testHypergeometricExecution(self): arr = tensor.random.hypergeometric(10, 20, 15, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.hypergeometric(10, 20, 15, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).hypergeometric(10, 20, 15, 10))) def testLaplaceExecution(self): arr = tensor.random.laplace(.5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.laplace(.5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).laplace(.5, 1.0, 10))) def testLogisticExecution(self): arr = tensor.random.logistic(.5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.logistic(.5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): np.testing.assert_equal(res, np.random.RandomState(0).logistic(.5, 1.0, 10)) def testLognormalExecution(self): arr = tensor.random.lognormal(.5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.lognormal(.5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).lognormal(.5, 1.0, 10))) def testLogseriesExecution(self): arr = tensor.random.logseries(.5, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.logseries(.5, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).logseries(.5, 10))) def testMultinomialExecution(self): arr = tensor.random.multinomial(10, [.2, .5, .3], 100, chunk_size=10) self.assertEqual(arr.shape, (100, 3)) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 3)) arr = tensor.random.multinomial(10, [.2, .5, .3], 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).multinomial(10, [.2, .5, .3], 10))) def testMultivariateNormalExecution(self): arr = tensor.random.multivariate_normal([1, 2], [[1, 0], [0, 1]], 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 2)) arr = tensor.random.multivariate_normal([1, 2], [[1, 0], [0, 1]], 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).multivariate_normal( [1, 2], [[1, 0], [0, 1]], 10))) def testNegativeBinomialExecution(self): arr = tensor.random.negative_binomial(5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.negative_binomial(5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).negative_binomial(5, 1.0, 10))) def testNoncentralChisquareExecution(self): arr = tensor.random.noncentral_chisquare(.5, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.noncentral_chisquare(.5, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).noncentral_chisquare(.5, 1.0, 10))) def testNoncentralFExecution(self): arr = tensor.random.noncentral_f(1.5, 1.0, 1.1, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.noncentral_f(1.5, 1.0, 1.1, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).noncentral_f(1.5, 1.0, 1.1, 10))) def testNormalExecute(self): arr = tensor.random.normal(10, 1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.normal(10, 1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).normal(10, 1.0, 10))) def testParetoExecute(self): arr = tensor.random.pareto(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.pareto(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).pareto(1.0, 10))) def testPoissonExecute(self): arr = tensor.random.poisson(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.poisson(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).poisson(1.0, 10))) def testPowerExecute(self): arr = tensor.random.power(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.power(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).power(1.0, 10))) def testRayleighExecute(self): arr = tensor.random.rayleigh(1.0, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.rayleigh(1.0, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).rayleigh(1.0, 10))) def testStandardCauchyExecute(self): arr = tensor.random.standard_cauchy(100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.standard_cauchy(100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_cauchy(10))) def testStandardExponentialExecute(self): arr = tensor.random.standard_exponential(100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.standard_exponential(100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_exponential(10))) def testStandardGammaExecute(self): arr = tensor.random.standard_gamma(.1, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.standard_gamma(.1, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_gamma(.1, 10))) def testStandardNormalExecute(self): arr = tensor.random.standard_normal(100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.standard_normal(100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_normal(10))) def testStandardTExecute(self): arr = tensor.random.standard_t(.1, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.standard_t(.1, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_t(.1, 10))) def testTriangularExecute(self): arr = tensor.random.triangular(.1, .2, .3, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.triangular(.1, .2, .3, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).triangular(.1, .2, .3, 10))) def testUniformExecute(self): arr = tensor.random.uniform(.1, .2, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.uniform(.1, .2, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).uniform(.1, .2, 10))) def testVonmisesExecute(self): arr = tensor.random.vonmises(.1, .2, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.vonmises(.1, .2, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).vonmises(.1, .2, 10))) def testWaldExecute(self): arr = tensor.random.wald(.1, .2, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.wald(.1, .2, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).wald(.1, .2, 10))) def testWeibullExecute(self): arr = tensor.random.weibull(.1, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.weibull(.1, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).weibull(.1, 10))) def testZipfExecute(self): arr = tensor.random.zipf(1.1, 100, chunk_size=10) self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,)) arr = tensor.random.zipf(1.1, 100, chunk_size=10).tiles() for chunk in arr.chunks: chunk.op._seed = 0 for res in self.executor.execute_tensor(arr): self.assertTrue(np.array_equal(res, np.random.RandomState(0).zipf(1.1, 10))) def testPermutationExecute(self): x = tensor.random.permutation(10) res = self.executor.execute_tensor(x, concat=True)[0] self.assertFalse(np.all(res[:-1] < res[1:])) np.testing.assert_array_equal(np.sort(res), np.arange(10)) arr = from_ndarray([1, 4, 9, 12, 15], chunk_size=2) x = tensor.random.permutation(arr) res = self.executor.execute_tensor(x, concat=True)[0] self.assertFalse(np.all(res[:-1] < res[1:])) np.testing.assert_array_equal(np.sort(res), np.asarray([1, 4, 9, 12, 15])) arr = from_ndarray(np.arange(48).reshape(12, 4), chunk_size=2) # axis = 0 x = tensor.random.permutation(arr) res = self.executor.execute_tensor(x, concat=True)[0] self.assertFalse(np.all(res[:-1] < res[1:])) np.testing.assert_array_equal(np.sort(res, axis=0), np.arange(48).reshape(12, 4)) # axis != 0 x2 = tensor.random.permutation(arr, axis=1) res = self.executor.execute_tensor(x2, concat=True)[0] self.assertFalse(np.all(res[:, :-1] < res[:, 1:])) np.testing.assert_array_equal(np.sort(res, axis=1), np.arange(48).reshape(12, 4))
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() @require_cudf def testToGPUExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1)) df = from_pandas_df(pdf, chunk_size=(13, 21)) cdf = to_gpu(df) res = self.executor.execute_dataframe(cdf, concat=True)[0] self.assertIsInstance(res, cudf.DataFrame) pd.testing.assert_frame_equal(res.to_pandas(), pdf) pseries = pdf.iloc[:, 0] series = from_pandas_series(pseries) cseries = series.to_gpu() res = self.executor.execute_dataframe(cseries, concat=True)[0] self.assertIsInstance(res, cudf.Series) pd.testing.assert_series_equal(res.to_pandas(), pseries) @require_cudf def testToCPUExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1)) df = from_pandas_df(pdf, chunk_size=(13, 21)) cdf = to_gpu(df) df2 = to_cpu(cdf) res = self.executor.execute_dataframe(df2, concat=True)[0] self.assertIsInstance(res, pd.DataFrame) pd.testing.assert_frame_equal(res, pdf) pseries = pdf.iloc[:, 0] series = from_pandas_series(pseries, chunk_size=(13, 21)) cseries = to_gpu(series) series2 = to_cpu(cseries) res = self.executor.execute_dataframe(series2, concat=True)[0] self.assertIsInstance(res, pd.Series) pd.testing.assert_series_equal(res, pseries) def testRechunkExecution(self): data = pd.DataFrame(np.random.rand(8, 10)) df = from_pandas_df(pd.DataFrame(data), chunk_size=3) df2 = df.rechunk((3, 4)) res = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_frame_equal(data, res) data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) df2 = df.rechunk(5) res = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_frame_equal(data, res) # test Series rechunk execution. data = pd.Series(np.random.rand(10,)) series = from_pandas_series(data) series2 = series.rechunk(3) res = self.executor.execute_dataframe(series2, concat=True)[0] pd.testing.assert_series_equal(data, res) series2 = series.rechunk(1) res = self.executor.execute_dataframe(series2, concat=True)[0] pd.testing.assert_series_equal(data, res) # test index rechunk execution data = pd.Index(np.random.rand(10,)) index = from_pandas_index(data) index2 = index.rechunk(3) res = self.executor.execute_dataframe(index2, concat=True)[0] pd.testing.assert_index_equal(data, res) index2 = index.rechunk(1) res = self.executor.execute_dataframe(index2, concat=True)[0] pd.testing.assert_index_equal(data, res) def testResetIndexExecution(self): data = pd.DataFrame([('bird', 389.0), ('bird', 24.0), ('mammal', 80.5), ('mammal', np.nan)], index=['falcon', 'parrot', 'lion', 'monkey'], columns=('class', 'max_speed')) df = from_pandas_df(data) df2 = df_reset_index(df) result = self.executor.execute_dataframe(df2, concat=True)[0] expected = data.reset_index() pd.testing.assert_frame_equal(result, expected) df = from_pandas_df(data, chunk_size=2) df2 = df_reset_index(df) result = self.executor.execute_dataframe(df2, concat=True)[0] expected = data.reset_index() pd.testing.assert_frame_equal(result, expected) df = from_pandas_df(data, chunk_size=1) df2 = df_reset_index(df, drop=True) result = self.executor.execute_dataframe(df2, concat=True)[0] expected = data.reset_index(drop=True) pd.testing.assert_frame_equal(result, expected) index = pd.MultiIndex.from_tuples([('bird', 'falcon'), ('bird', 'parrot'), ('mammal', 'lion'), ('mammal', 'monkey')], names=['class', 'name']) data = pd.DataFrame([('bird', 389.0), ('bird', 24.0), ('mammal', 80.5), ('mammal', np.nan)], index=index, columns=('type', 'max_speed')) df = from_pandas_df(data, chunk_size=1) df2 = df_reset_index(df, level='class') result = self.executor.execute_dataframe(df2, concat=True)[0] expected = data.reset_index(level='class') pd.testing.assert_frame_equal(result, expected) columns = pd.MultiIndex.from_tuples([('speed', 'max'), ('species', 'type')]) data.columns = columns df = from_pandas_df(data, chunk_size=2) df2 = df_reset_index(df, level='class', col_level=1, col_fill='species') result = self.executor.execute_dataframe(df2, concat=True)[0] expected = data.reset_index(level='class', col_level=1, col_fill='species') pd.testing.assert_frame_equal(result, expected) # Test Series s = pd.Series([1, 2, 3, 4], name='foo', index=pd.Index(['a', 'b', 'c', 'd'], name='idx')) series = from_pandas_series(s) s2 = series_reset_index(series, name='bar') result = self.executor.execute_dataframe(s2, concat=True)[0] expected = s.reset_index(name='bar') pd.testing.assert_frame_equal(result, expected) series = from_pandas_series(s, chunk_size=2) s2 = series_reset_index(series, drop=True) result = self.executor.execute_dataframe(s2, concat=True)[0] expected = s.reset_index(drop=True) pd.testing.assert_series_equal(result, expected) # Test Unknown shape sess = new_session() data1 = pd.DataFrame(np.random.rand(10, 3), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9]) df1 = from_pandas_df(data1, chunk_size=5) data2 = pd.DataFrame(np.random.rand(10, 3), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3]) df2 = from_pandas_df(data2, chunk_size=6) df = (df1 + df2).reset_index() result = sess.run(df) pd.testing.assert_index_equal(result.index, pd.RangeIndex(12)) # Inconsistent with Pandas when input dataframe's shape is unknown. result = result.sort_values(by=result.columns[0]) expected = (data1 + data2).reset_index() np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy()) data1 = pd.Series(np.random.rand(10,), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9]) series1 = from_pandas_series(data1, chunk_size=3) data2 = pd.Series(np.random.rand(10,), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3]) series2 = from_pandas_series(data2, chunk_size=3) df = (series1 + series2).reset_index() result = sess.run(df) pd.testing.assert_index_equal(result.index, pd.RangeIndex(12)) # Inconsistent with Pandas when input dataframe's shape is unknown. result = result.sort_values(by=result.columns[0]) expected = (data1 + data2).reset_index() np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy()) def testSeriesMapExecution(self): raw = pd.Series(np.arange(10)) s = from_pandas_series(raw, chunk_size=7) with self.assertRaises(ValueError): # cannot infer dtype, the inferred is int, # but actually it is float # just due to nan s.map({5: 10}) r = s.map({5: 10}, dtype=float) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map({5: 10}) pd.testing.assert_series_equal(result, expected) r = s.map({i: 10 + i for i in range(7)}, dtype=float) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map({i: 10 + i for i in range(7)}) pd.testing.assert_series_equal(result, expected) r = s.map({5: 10}, dtype=float, na_action='ignore') result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map({5: 10}, na_action='ignore') pd.testing.assert_series_equal(result, expected) # dtype can be inferred r = s.map({5: 10.}) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map({5: 10.}) pd.testing.assert_series_equal(result, expected) r = s.map(lambda x: x + 1, dtype=int) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map(lambda x: x + 1) pd.testing.assert_series_equal(result, expected) def f(x: int) -> float: return x + 1. # dtype can be inferred for function r = s.map(f) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map(lambda x: x + 1.) pd.testing.assert_series_equal(result, expected) # test arg is a md.Series raw2 = pd.Series([10], index=[5]) s2 = from_pandas_series(raw2) r = s.map(s2, dtype=float) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map(raw2) pd.testing.assert_series_equal(result, expected) # test arg is a md.Series, and dtype can be inferred raw2 = pd.Series([10.], index=[5]) s2 = from_pandas_series(raw2) r = s.map(s2) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map(raw2) pd.testing.assert_series_equal(result, expected) # test str raw = pd.Series(['a', 'b', 'c', 'd']) s = from_pandas_series(raw, chunk_size=2) r = s.map({'c': 'e'}) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.map({'c': 'e'}) pd.testing.assert_series_equal(result, expected) def testDescribeExecution(self): s_raw = pd.Series(np.random.rand(10)) # test one chunk series = from_pandas_series(s_raw, chunk_size=10) r = series.describe() result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.describe() pd.testing.assert_series_equal(result, expected) r = series.describe(percentiles=[]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.describe(percentiles=[]) pd.testing.assert_series_equal(result, expected) # test multi chunks series = from_pandas_series(s_raw, chunk_size=3) r = series.describe() result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.describe() pd.testing.assert_series_equal(result, expected) r = series.describe(percentiles=[]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.describe(percentiles=[]) pd.testing.assert_series_equal(result, expected) df_raw = pd.DataFrame(np.random.rand(10, 4), columns=list('abcd')) df_raw['e'] = np.random.randint(100, size=10) # test one chunk df = from_pandas_df(df_raw, chunk_size=10) r = df.describe() result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.describe() pd.testing.assert_frame_equal(result, expected) r = series.describe(percentiles=[], include=np.float64) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.describe(percentiles=[], include=np.float64) pd.testing.assert_series_equal(result, expected) # test multi chunks df = from_pandas_df(df_raw, chunk_size=3) r = df.describe() result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.describe() pd.testing.assert_frame_equal(result, expected) r = df.describe(percentiles=[], include=np.float64) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.describe(percentiles=[], include=np.float64) pd.testing.assert_frame_equal(result, expected) with self.assertRaises(ValueError): df.describe(percentiles=[1.1]) def testDataFrameFillNAExecution(self): df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(20): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) value_df_raw = pd.DataFrame(np.random.randint(0, 100, (10, 7)).astype(np.float32), columns=list('ABCDEFG')) # test DataFrame single chunk with numeric fill df = from_pandas_df(df_raw) r = df.fillna(1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(1) pd.testing.assert_frame_equal(result, expected) # test DataFrame single chunk with value as single chunk df = from_pandas_df(df_raw) value_df = from_pandas_df(value_df_raw) r = df.fillna(value_df) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(value_df_raw) pd.testing.assert_frame_equal(result, expected) # test chunked with numeric fill df = from_pandas_df(df_raw, chunk_size=3) r = df.fillna(1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(1) pd.testing.assert_frame_equal(result, expected) # test inplace tile df = from_pandas_df(df_raw, chunk_size=3) df.fillna(1, inplace=True) result = self.executor.execute_dataframe(df, concat=True)[0] expected = df_raw.fillna(1) pd.testing.assert_frame_equal(result, expected) # test forward fill in axis=0 without limit df = from_pandas_df(df_raw, chunk_size=3) r = df.fillna(method='pad') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(method='pad') pd.testing.assert_frame_equal(result, expected) # test backward fill in axis=0 without limit df = from_pandas_df(df_raw, chunk_size=3) r = df.fillna(method='backfill') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(method='backfill') pd.testing.assert_frame_equal(result, expected) # test forward fill in axis=1 without limit df = from_pandas_df(df_raw, chunk_size=3) r = df.ffill(axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.ffill(axis=1) pd.testing.assert_frame_equal(result, expected) # test backward fill in axis=1 without limit df = from_pandas_df(df_raw, chunk_size=3) r = df.bfill(axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.bfill(axis=1) pd.testing.assert_frame_equal(result, expected) # test fill with dataframe df = from_pandas_df(df_raw, chunk_size=3) value_df = from_pandas_df(value_df_raw, chunk_size=4) r = df.fillna(value_df) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(value_df_raw) pd.testing.assert_frame_equal(result, expected) # test fill with series value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32), index=list('ABCDEFGHIJ')) df = from_pandas_df(df_raw, chunk_size=3) value_series = from_pandas_series(value_series_raw, chunk_size=4) r = df.fillna(value_series) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.fillna(value_series_raw) pd.testing.assert_frame_equal(result, expected) def testSeriesFillNAExecution(self): series_raw = pd.Series(np.nan, index=range(20)) for _ in range(3): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32)) series = from_pandas_series(series_raw) r = series.fillna(1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(1) pd.testing.assert_series_equal(result, expected) # test DataFrame single chunk with value as single chunk series = from_pandas_series(series_raw) value_series = from_pandas_series(value_series_raw) r = series.fillna(value_series) result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(value_series_raw) pd.testing.assert_series_equal(result, expected) # test chunked with numeric fill series = from_pandas_series(series_raw, chunk_size=3) r = series.fillna(1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(1) pd.testing.assert_series_equal(result, expected) # test inplace tile series = from_pandas_series(series_raw, chunk_size=3) series.fillna(1, inplace=True) result = self.executor.execute_dataframe(series, concat=True)[0] expected = series_raw.fillna(1) pd.testing.assert_series_equal(result, expected) # test forward fill in axis=0 without limit series = from_pandas_series(series_raw, chunk_size=3) r = series.fillna(method='pad') result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(method='pad') pd.testing.assert_series_equal(result, expected) # test backward fill in axis=0 without limit series = from_pandas_series(series_raw, chunk_size=3) r = series.fillna(method='backfill') result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(method='backfill') pd.testing.assert_series_equal(result, expected) # test fill with series series = from_pandas_series(series_raw, chunk_size=3) value_df = from_pandas_series(value_series_raw, chunk_size=4) r = series.fillna(value_df) result = self.executor.execute_dataframe(r, concat=True)[0] expected = series_raw.fillna(value_series_raw) pd.testing.assert_series_equal(result, expected) def testDataFrameApplyExecute(self): cols = [chr(ord('A') + i) for i in range(10)] df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) old_chunk_store_limit = options.chunk_store_limit try: options.chunk_store_limit = 20 df = from_pandas_df(df_raw, chunk_size=5) r = df.apply('ffill') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply('ffill') pd.testing.assert_frame_equal(result, expected) r = df.apply(['sum', 'max']) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(['sum', 'max']) pd.testing.assert_frame_equal(result, expected) r = df.apply(np.sqrt) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(np.sqrt) pd.testing.assert_frame_equal(result, expected) r = df.apply(lambda x: pd.Series([1, 2])) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: pd.Series([1, 2])) pd.testing.assert_frame_equal(result, expected) r = df.apply(np.sum, axis='index') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(np.sum, axis='index') pd.testing.assert_series_equal(result, expected) r = df.apply(np.sum, axis='columns') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(np.sum, axis='columns') pd.testing.assert_series_equal(result, expected) r = df.apply(lambda x: [1, 2], axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: [1, 2], axis=1) pd.testing.assert_series_equal(result, expected) r = df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) pd.testing.assert_frame_equal(result, expected) r = df.apply(lambda x: [1, 2], axis=1, result_type='expand') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: [1, 2], axis=1, result_type='expand') pd.testing.assert_frame_equal(result, expected) r = df.apply(lambda x: list(range(10)), axis=1, result_type='reduce') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='reduce') pd.testing.assert_series_equal(result, expected) r = df.apply(lambda x: list(range(10)), axis=1, result_type='broadcast') result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='broadcast') pd.testing.assert_frame_equal(result, expected) finally: options.chunk_store_limit = old_chunk_store_limit def testSeriesApplyExecute(self): idxes = [chr(ord('A') + i) for i in range(20)] s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = from_pandas_series(s_raw, chunk_size=5) r = series.apply('add', args=(1,)) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.apply('add', args=(1,)) pd.testing.assert_series_equal(result, expected) r = series.apply(['sum', 'max']) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.apply(['sum', 'max']) pd.testing.assert_series_equal(result, expected) r = series.apply(np.sqrt) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.apply(np.sqrt) pd.testing.assert_series_equal(result, expected) r = series.apply('sqrt') result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.apply('sqrt') pd.testing.assert_series_equal(result, expected) r = series.apply(lambda x: [x, x + 1], convert_dtype=False) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.apply(lambda x: [x, x + 1], convert_dtype=False) pd.testing.assert_series_equal(result, expected) def testTransformExecute(self): cols = [chr(ord('A') + i) for i in range(10)] df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) idx_vals = [chr(ord('A') + i) for i in range(20)] s_raw = pd.Series([i ** 2 for i in range(20)], index=idx_vals) def rename_fn(f, new_name): f.__name__ = new_name return f old_chunk_store_limit = options.chunk_store_limit try: options.chunk_store_limit = 20 # DATAFRAME CASES df = from_pandas_df(df_raw, chunk_size=5) # test transform scenarios on data frames r = df.transform(lambda x: list(range(len(x)))) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.transform(lambda x: list(range(len(x)))) pd.testing.assert_frame_equal(result, expected) r = df.transform(lambda x: list(range(len(x))), axis=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.transform(lambda x: list(range(len(x))), axis=1) pd.testing.assert_frame_equal(result, expected) r = df.transform(['cumsum', 'cummax', lambda x: x + 1]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.transform(['cumsum', 'cummax', lambda x: x + 1]) pd.testing.assert_frame_equal(result, expected) fn_dict = OrderedDict([ ('A', 'cumsum'), ('D', ['cumsum', 'cummax']), ('F', lambda x: x + 1), ]) r = df.transform(fn_dict) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.transform(fn_dict) pd.testing.assert_frame_equal(result, expected) # test agg scenarios on series r = df.transform(lambda x: x.iloc[:-1], _call_agg=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.agg(lambda x: x.iloc[:-1]) pd.testing.assert_frame_equal(result, expected) r = df.transform(lambda x: x.iloc[:-1], axis=1, _call_agg=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.agg(lambda x: x.iloc[:-1], axis=1) pd.testing.assert_frame_equal(result, expected) fn_list = [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'), lambda x: x.iloc[:-1].reset_index(drop=True)] r = df.transform(fn_list, _call_agg=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.agg(fn_list) pd.testing.assert_frame_equal(result, expected) r = df.transform(lambda x: x.sum(), _call_agg=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.agg(lambda x: x.sum()) pd.testing.assert_series_equal(result, expected) fn_dict = OrderedDict([ ('A', rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1')), ('D', [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'), lambda x: x.iloc[:-1].reset_index(drop=True)]), ('F', lambda x: x.iloc[:-1].reset_index(drop=True)), ]) r = df.transform(fn_dict, _call_agg=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = df_raw.agg(fn_dict) pd.testing.assert_frame_equal(result, expected) # SERIES CASES series = from_pandas_series(s_raw, chunk_size=5) # test transform scenarios on series r = series.transform(lambda x: x + 1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.transform(lambda x: x + 1) pd.testing.assert_series_equal(result, expected) r = series.transform(['cumsum', lambda x: x + 1]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s_raw.transform(['cumsum', lambda x: x + 1]) pd.testing.assert_frame_equal(result, expected) finally: options.chunk_store_limit = old_chunk_store_limit def testStringMethodExecution(self): s = pd.Series(['s1,s2', 'ef,', 'dd', np.nan]) s2 = pd.concat([s, s, s]) series = from_pandas_series(s, chunk_size=2) series2 = from_pandas_series(s2, chunk_size=2) # test getitem r = series.str[:3] result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str[:3] pd.testing.assert_series_equal(result, expected) # test split, expand=False r = series.str.split(',', n=2) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.split(',', n=2) pd.testing.assert_series_equal(result, expected) # test split, expand=True r = series.str.split(',', expand=True, n=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.split(',', expand=True, n=1) pd.testing.assert_frame_equal(result, expected) # test rsplit r = series.str.rsplit(',', expand=True, n=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.rsplit(',', expand=True, n=1) pd.testing.assert_frame_equal(result, expected) # test cat all data r = series2.str.cat(sep='/', na_rep='e') result = self.executor.execute_dataframe(r, concat=True)[0] expected = s2.str.cat(sep='/', na_rep='e') self.assertEqual(result, expected) # test cat list r = series.str.cat(['a', 'b', np.nan, 'c']) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.cat(['a', 'b', np.nan, 'c']) pd.testing.assert_series_equal(result, expected) # test cat series r = series.str.cat(series.str.capitalize(), join='outer') result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.cat(s.str.capitalize(), join='outer') pd.testing.assert_series_equal(result, expected) # test extractall r = series.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)") result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)") pd.testing.assert_frame_equal(result, expected) # test extract, expand=False r = series.str.extract(r'[ab](\d)', expand=False) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.extract(r'[ab](\d)', expand=False) pd.testing.assert_series_equal(result, expected) # test extract, expand=True r = series.str.extract(r'[ab](\d)', expand=True) result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.str.extract(r'[ab](\d)', expand=True) pd.testing.assert_frame_equal(result, expected) def testDatetimeMethodExecution(self): # test datetime s = pd.Series([pd.Timestamp('2020-1-1'), pd.Timestamp('2020-2-1'), np.nan]) series = from_pandas_series(s, chunk_size=2) r = series.dt.year result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.dt.year pd.testing.assert_series_equal(result, expected) r = series.dt.strftime('%m-%d-%Y') result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.dt.strftime('%m-%d-%Y') pd.testing.assert_series_equal(result, expected) # test timedelta s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('3 days'), np.nan]) series = from_pandas_series(s, chunk_size=2) r = series.dt.days result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.dt.days pd.testing.assert_series_equal(result, expected) def testSeriesIsin(self): # one chunk in multiple chunks a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = pd.Series([2, 1, 9, 3]) sa = from_pandas_series(a, chunk_size=10) sb = from_pandas_series(b, chunk_size=2) result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) # multiple chunk in one chunks a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = pd.Series([2, 1, 9, 3]) sa = from_pandas_series(a, chunk_size=2) sb = from_pandas_series(b, chunk_size=4) result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) # multiple chunk in multiple chunks a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = pd.Series([2, 1, 9, 3]) sa = from_pandas_series(a, chunk_size=2) sb = from_pandas_series(b, chunk_size=2) result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = pd.Series([2, 1, 9, 3]) sa = from_pandas_series(a, chunk_size=2) result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = np.array([2, 1, 9, 3]) sa = from_pandas_series(a, chunk_size=2) sb = tensor(b, chunk_size=3) result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) b = {2, 1, 9, 3} # set sa = from_pandas_series(a, chunk_size=2) result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0] expected = a.isin(b) pd.testing.assert_series_equal(result, expected) def testCheckNA(self): df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(20): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) df = from_pandas_df(df_raw, chunk_size=4) pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.isna(), concat=True)[0], df_raw.isna()) pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.notna(), concat=True)[0], df_raw.notna()) series_raw = pd.Series(np.nan, index=range(20)) for _ in range(3): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) series = from_pandas_series(series_raw, chunk_size=4) pd.testing.assert_series_equal(self.executor.execute_dataframe(series.isna(), concat=True)[0], series_raw.isna()) pd.testing.assert_series_equal(self.executor.execute_dataframe(series.notna(), concat=True)[0], series_raw.notna()) def testDropNA(self): # dataframe cases df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(30): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) for rowid in range(random.randint(1, 5)): row = random.randint(0, 19) for idx in range(0, 10): df_raw.iloc[row, idx] = random.randint(0, 99) # only one chunk in columns, can run dropna directly r = from_pandas_df(df_raw, chunk_size=(4, 10)).dropna() pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna()) # multiple chunks in columns, count() will be called first r = from_pandas_df(df_raw, chunk_size=4).dropna() pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna()) r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all') pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna(how='all')) r = from_pandas_df(df_raw, chunk_size=4).dropna(subset=list('ABFI')) pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna(subset=list('ABFI'))) r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all', subset=list('BDHJ')) pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna(how='all', subset=list('BDHJ'))) r = from_pandas_df(df_raw, chunk_size=4) r.dropna(how='all', inplace=True) pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0], df_raw.dropna(how='all')) # series cases series_raw = pd.Series(np.nan, index=range(20)) for _ in range(10): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) r = from_pandas_series(series_raw, chunk_size=4).dropna() pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0], series_raw.dropna()) r = from_pandas_series(series_raw, chunk_size=4) r.dropna(inplace=True) pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0], series_raw.dropna()) def testCutExecution(self): rs = np.random.RandomState(0) raw = rs.random(15) * 1000 s = pd.Series(raw, index=['i{}'.format(i) for i in range(15)]) bins = [10, 100, 500] ii = pd.interval_range(10, 500, 3) labels = ['a', 'b'] t = tensor(raw, chunk_size=4) series = from_pandas_series(s, chunk_size=4) iii = from_pandas_index(ii, chunk_size=2) # cut on Series r = cut(series, bins) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.cut(s, bins)) r, b = cut(series, bins, retbins=True) r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_tensor(b, concat=True)[0] r_expected, b_expected = pd.cut(s, bins, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) # cut on tensor r = cut(t, bins) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) # one chunk r = cut(s, tensor(bins, chunk_size=2), right=False, include_lowest=True) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.cut(s, bins, right=False, include_lowest=True)) # test labels r = cut(t, bins, labels=labels) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins, labels=labels) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) r = cut(t, bins, labels=False) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_tensor(r, concat=True)[0] expected = pd.cut(raw, bins, labels=False) np.testing.assert_array_equal(result, expected) # test labels which is tensor labels_t = tensor(['a', 'b'], chunk_size=1) r = cut(raw, bins, labels=labels_t, include_lowest=True) # result and expected is array whose dtype is CategoricalDtype result = self.executor.execute_dataframe(r, concat=True)[0] expected = pd.cut(raw, bins, labels=labels, include_lowest=True) self.assertEqual(len(result), len(expected)) for r, e in zip(result, expected): np.testing.assert_equal(r, e) # test labels=False r, b = cut(raw, ii, labels=False, retbins=True) # result and expected is array whose dtype is CategoricalDtype r_result = self.executor.execute_tileable(r, concat=True)[0] b_result = self.executor.execute_tileable(b, concat=True)[0] r_expected, b_expected = pd.cut(raw, ii, labels=False, retbins=True) for r, e in zip(r_result, r_expected): np.testing.assert_equal(r, e) pd.testing.assert_index_equal(b_result, b_expected) # test bins which is md.IntervalIndex r, b = cut(series, iii, labels=tensor(labels, chunk_size=1), retbins=True) r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_dataframe(b, concat=True)[0] r_expected, b_expected = pd.cut(s, ii, labels=labels, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) pd.testing.assert_index_equal(b_result, b_expected) # test duplicates bins2 = [0, 2, 4, 6, 10, 10] r, b = cut(s, bins2, labels=False, retbins=True, right=False, duplicates='drop') r_result = self.executor.execute_dataframe(r, concat=True)[0] b_result = self.executor.execute_tensor(b, concat=True)[0] r_expected, b_expected = pd.cut(s, bins2, labels=False, retbins=True, right=False, duplicates='drop') pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) ctx, executor = self._create_test_context(self.executor) with ctx: # test integer bins r = cut(series, 3) result = executor.execute_dataframes([r])[0] pd.testing.assert_series_equal(result, pd.cut(s, 3)) r, b = cut(series, 3, right=False, retbins=True) r_result, b_result = executor.execute_dataframes([r, b]) r_expected, b_expected = pd.cut(s, 3, right=False, retbins=True) pd.testing.assert_series_equal(r_result, r_expected) np.testing.assert_array_equal(b_result, b_expected) # test min max same s2 = pd.Series([1.1] * 15) r = cut(s2, 3) result = executor.execute_dataframes([r])[0] pd.testing.assert_series_equal(result, pd.cut(s2, 3)) # test inf exist s3 = s2.copy() s3[-1] = np.inf with self.assertRaises(ValueError): executor.execute_dataframes([cut(s3, 3)]) def testShiftExecution(self): # test dataframe rs = np.random.RandomState(0) raw = pd.DataFrame(rs.randint(1000, size=(10, 8)), columns=['col' + str(i + 1) for i in range(8)]) df = from_pandas_df(raw, chunk_size=5) for periods in (2, -2, 6, -6): for axis in (0, 1): for fill_value in (None, 0, 1.): r = df.shift(periods=periods, axis=axis, fill_value=fill_value) try: result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.shift(periods=periods, axis=axis, fill_value=fill_value) pd.testing.assert_frame_equal(result, expected) except AssertionError as e: # pragma: no cover raise AssertionError( 'Failed when periods: {}, axis: {}, fill_value: {}'.format( periods, axis, fill_value )) from e raw2 = raw.copy() raw2.index = pd.date_range('2020-1-1', periods=10) raw2.columns = pd.date_range('2020-3-1', periods=8) df2 = from_pandas_df(raw2, chunk_size=5) # test freq not None for periods in (2, -2): for axis in (0, 1): for fill_value in (None, 0, 1.): r = df2.shift(periods=periods, freq='D', axis=axis, fill_value=fill_value) try: result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw2.shift(periods=periods, freq='D', axis=axis, fill_value=fill_value) pd.testing.assert_frame_equal(result, expected) except AssertionError as e: # pragma: no cover raise AssertionError( 'Failed when periods: {}, axis: {}, fill_value: {}'.format( periods, axis, fill_value )) from e # test tshift r = df2.tshift(periods=1) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw2.tshift(periods=1) pd.testing.assert_frame_equal(result, expected) with self.assertRaises(ValueError): _ = df.tshift(periods=1) # test series s = raw.iloc[:, 0] series = from_pandas_series(s, chunk_size=5) for periods in (0, 2, -2, 6, -6): for fill_value in (None, 0, 1.): r = series.shift(periods=periods, fill_value=fill_value) try: result = self.executor.execute_dataframe(r, concat=True)[0] expected = s.shift(periods=periods, fill_value=fill_value) pd.testing.assert_series_equal(result, expected) except AssertionError as e: # pragma: no cover raise AssertionError( 'Failed when periods: {}, fill_value: {}'.format( periods, fill_value )) from e s2 = raw2.iloc[:, 0] # test freq not None series2 = from_pandas_series(s2, chunk_size=5) for periods in (2, -2): for fill_value in (None, 0, 1.): r = series2.shift(periods=periods, freq='D', fill_value=fill_value) try: result = self.executor.execute_dataframe(r, concat=True)[0] expected = s2.shift(periods=periods, freq='D', fill_value=fill_value) pd.testing.assert_series_equal(result, expected) except AssertionError as e: # pragma: no cover raise AssertionError( 'Failed when periods: {}, fill_value: {}'.format( periods, fill_value )) from e
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testGammalnExecution(self): raw = np.random.rand(10, 8, 6) a = tensor(raw, chunk_size=3) r = gammaln(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_gammaln(raw) np.testing.assert_array_equal(result, expected) # test sparse raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) a = tensor(raw, chunk_size=3) r = gammaln(a) result = self.executor.execute_tensor(r, concat=True)[0] data = scipy_gammaln(raw.data) expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape) np.testing.assert_array_equal(result.toarray(), expected.toarray()) def testErfExecution(self): raw = np.random.rand(10, 8, 6) a = tensor(raw, chunk_size=3) r = erf(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_erf(raw) np.testing.assert_array_equal(result, expected) # test sparse raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) a = tensor(raw, chunk_size=3) r = erf(a) result = self.executor.execute_tensor(r, concat=True)[0] data = scipy_erf(raw.data) expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape) np.testing.assert_array_equal(result.toarray(), expected.toarray()) def testEntrExecution(self): raw = np.random.rand(10, 8, 6) a = tensor(raw, chunk_size=3) r = entr(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_entr(raw) np.testing.assert_array_equal(result, expected) # test sparse raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) a = tensor(raw, chunk_size=3) r = entr(a) result = self.executor.execute_tensor(r, concat=True)[0] data = scipy_entr(raw.data) expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape) np.testing.assert_array_equal(result.toarray(), expected.toarray()) def testRelEntrExecution(self): raw1 = np.random.rand(4, 3, 2) raw2 = np.random.rand(4, 3, 2) a = tensor(raw1, chunk_size=3) b = tensor(raw2, chunk_size=3) r = rel_entr(a, b) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_rel_entr(raw1, raw2) np.testing.assert_array_equal(result, expected) # test sparse raw1 = sps.csr_matrix( np.array([0, 1.0, 1.01, np.nan] * 3).reshape(4, 3)) a = tensor(raw1, chunk_size=3) raw2 = np.random.rand(4, 3) b = tensor(raw2, chunk_size=3) r = rel_entr(a, b) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_rel_entr(raw1.toarray(), raw2) np.testing.assert_array_equal(result.toarray(), expected)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest('numpy') @unittest.skipIf(tiledb is None, 'tiledb not installed') def testStoreTileDBExecution(self): ctx = tiledb.Ctx() tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.random.rand(8, 4, 3) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store tensor with 1 chunk to TileDB dense array a = arange(12) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(np.arange(12), arr.read_direct()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store 2-d TileDB sparse array expected = sps.random(8, 7, density=0.1) a = tensor(expected, chunk_size=(3, 5)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr: data = arr[:, :] coords = data['coords'] value = data[arr.attr(0).name] ij = tuple(coords[arr.domain.dim(k).name] for k in range(arr.ndim)) result = sps.coo_matrix((value, ij), shape=arr.shape) np.testing.assert_allclose(expected.toarray(), result.toarray()) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() try: # store TileDB dense array expected = np.asfortranarray(np.random.rand(8, 4, 3)) a = tensor(expected, chunk_size=(3, 3, 2)) save = totiledb(tempdir, a, ctx=ctx) self.executor.execute_tensor(save) with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr: np.testing.assert_allclose(expected, arr.read_direct()) self.assertEqual(arr.schema.cell_order, 'col-major') finally: shutil.rmtree(tempdir) @unittest.skipIf(h5py is None, 'h5py not installed') def testStoreHDF5Execution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t1 = tensor(raw, chunk_size=20) t2 = tensor(raw, chunk_size=9) with self.assertRaises(TypeError): tohdf5(object(), t2) ctx, executor = self._create_test_context(self.executor) with ctx: with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, f'test_store_{int(time.time())}.hdf5') # test 1 chunk r = tohdf5(filename, t1, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f[f'{group_name}/{dataset_name}']) np.testing.assert_array_equal(result, raw) # test filename r = tohdf5(filename, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) rt = get_tiled(r) self.assertEqual( type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive') self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0) with h5py.File(filename, 'r') as f: result = np.asarray(f[f'{group_name}/{dataset_name}']) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): tohdf5(filename, t2) with h5py.File(filename, 'r') as f: # test file r = tohdf5(f, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f[f'{group_name}/{dataset_name}']) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): with h5py.File(filename, 'r') as f: tohdf5(f, t2) with h5py.File(filename, 'r') as f: # test dataset ds = f[f'{group_name}/{dataset_name}'] # test file r = tohdf5(ds, t2) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f[f'{group_name}/{dataset_name}']) np.testing.assert_array_equal(result, raw) @unittest.skipIf(zarr is None, 'zarr not installed') def testStoreZarrExecution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t = tensor(raw, chunk_size=6) with self.assertRaises(TypeError): tozarr(object(), t) with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, f'test_store_{int(time.time())}.zarr') path = f'{filename}/{group_name}/{dataset_name}' r = tozarr(filename, t, group=group_name, dataset=dataset_name, compressor=Zstd(level=3)) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw) self.assertEqual(arr.compressor, Zstd(level=3)) r = tozarr(path, t + 2) self.executor.execute_tensor(r) arr = zarr.open(path) np.testing.assert_array_equal(arr, raw + 2) filters = [Delta(dtype='i4')] compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) arr = zarr.open(path, compressor=compressor, filters=filters) r = tozarr(arr, t + 1) self.executor.execute_tensor(r) result = zarr.open_array(path) np.testing.assert_array_equal(result, raw + 1) @unittest.skipIf(vineyard is None, 'vineyard not installed') @flaky(max_runs=3) def testToVineyard(self): def run_with_given_session(session, **kw): ipc_socket = os.environ.get('VINEYARD_IPC_SOCKET', '/tmp/vineyard/vineyard.sock') with option_context({'vineyard.socket': ipc_socket}): tensor1 = tensor(np.arange(12).reshape(3, 4), chunk_size=2) object_id = tovineyard(tensor1).execute( session=session, **kw).fetch(session=session) tensor2 = from_vineyard(object_id) tensor1_value = tensor1.execute(session=session, **kw).fetch(session=session) tensor2_value = tensor2.execute(session=session, **kw).fetch(session=session) np.testing.assert_array_equal(tensor1_value, tensor2_value) with new_session().as_default() as session: run_with_given_session(session) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=False) as cluster: with new_session(cluster.endpoint).as_default() as session: run_with_given_session(session, timeout=_exec_timeout)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testManualBuildFaissIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(0).rand(n_test, d).astype(np.float32) nn = NearestNeighbors(algorithm='kd_tree') nn.fit(x) _, expected_indices = nn.kneighbors(y, 5) for index_type in ['object', 'filename', 'bytes']: # test brute-force search X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type=index_type) faiss_index = self.executor.execute_tileable(index) index_shards = faiss.IndexShards(d) for ind in faiss_index: shard = _load_index(None, index.op, ind, -1) index_shards.add_shard(shard) faiss_index = index_shards faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test one chunk, brute force X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test train, same distribution X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) self.assertEqual(len(get_tiled(index).chunks), 1) # test train, distributions are variant X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0, same_distribution=False, return_index_type='object') faiss_index = self.executor.execute_tileable(index) self.assertEqual(len(faiss_index), 5) for ind in faiss_index: self.assertIsInstance(ind, faiss.IndexIVFFlat) self.assertEqual(ind.ntotal, 10) # test one chunk, train X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) # test wrong index with self.assertRaises(ValueError): build_faiss_index(X, 'unknown_index', None) # test unknown metric with self.assertRaises(ValueError): build_faiss_index(X, 'Flat', None, metric='unknown_metric') def testFaissQuery(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) test_tensors = [ # multi chunks (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)), # one chunk (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10)) ] for X, Y in test_tensors: for metric in ['l2', 'cosine']: faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = self.executor.execute_tensors([d, i]) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch()) def testGenIndexStringAndSampleCount(self): d = 32 # accuracy=True, could be Flat only ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, 'minimum') self.assertEqual(ret, ('Flat', None)) # no memory concern ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum') self.assertEqual(ret, ('HNSW32', None)) index = faiss.index_factory(d, ret[0]) self.assertTrue(index.is_trained) # memory concern not much ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'high') self.assertEqual(ret, ('IVF1580,Flat', 47400)) index = faiss.index_factory(d, ret[0]) self.assertFalse(index.is_trained) # memory quite important ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, 'low') self.assertEqual(ret, ('PCAR16,IVF65536_HNSW32,SQ8', 32 * 65536)) index = faiss.index_factory(d, ret[0]) self.assertFalse(index.is_trained) # memory very important ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, 'minimum') self.assertEqual(ret, ('OPQ16_32,IVF1048576_HNSW32,PQ16', 64 * 65536)) index = faiss.index_factory(d, ret[0]) self.assertFalse(index.is_trained) ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, 'low') self.assertEqual(ret, ('PCAR16,IVF1048576_HNSW32,SQ8', 64 * 65536)) index = faiss.index_factory(d, ret[0]) self.assertFalse(index.is_trained) with self.assertRaises(ValueError): # M > 64 raise error _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum', M=128) with self.assertRaises(ValueError): # M > 64 _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=128) with self.assertRaises(ValueError): # dim should be multiple of M _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=16, dim=17) with self.assertRaises(ValueError): _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'low', k=5) def testAutoIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) for chunk_size in (50, 20): X = mt.tensor(x, chunk_size=chunk_size) faiss_index = build_faiss_index(X, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, y, 5, nprobe=10) indices = self.executor.execute_tensor(i, concat=True)[0] nn = NearestNeighbors() nn.fit(x) expected_indices = nn.kneighbors(y, 5, return_distance=False) np.testing.assert_array_equal(indices, expected_indices)
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') self.old_chunk = options.chunk_size options.chunk_size = 10 def tearDown(self): options.chunk_size = self.old_chunk def testBoolIndexingExecution(self): raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=3) index = arr < .5 arr2 = arr[index] size_res = self.executor.execute_tensor(arr2, mock=True) res = self.executor.execute_tensor(arr2) self.assertEqual(sum(s[0] for s in size_res), arr.nbytes) np.testing.assert_array_equal(np.sort(np.concatenate(res)), np.sort(raw[raw < .5])) index2 = tensor(raw[:, :, 0, 0], chunk_size=3) < .5 arr3 = arr[index2] res = self.executor.execute_tensor(arr3, concat=True)[0] expected = raw[raw[:, :, 0, 0] < .5] self.assertEqual(sum(it.size for it in res), expected.size) self.assertEqual(res.shape, expected.shape) raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=3) index = tensor(raw[:, :, 0, 0], chunk_size=3) < .5 arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw[raw[:, :, 0, 0] < .5].copy('A') self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testFancyIndexingNumpyExecution(self): # test fancy index of type numpy ndarray raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) index = [9, 10, 3, 1, 8, 10] arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[index]) index = np.random.permutation(8) arr3 = arr[:2, ..., index] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[:2, ..., index]) index = [1, 3, 9, 10] arr4 = arr[..., index, :5] res = self.executor.execute_tensor(arr4, concat=True)[0] np.testing.assert_array_equal(res, raw[..., index, :5]) index1 = [8, 10, 3, 1, 9, 10] index2 = [1, 3, 9, 10, 2, 7] arr5 = arr[index1, :, index2] res = self.executor.execute_tensor(arr5, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [1, 3, 5, 7, 9, 10] index2 = [1, 9, 9, 10, 2, 7] arr6 = arr[index1, :, index2] res = self.executor.execute_tensor(arr6, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) # fancy index is ordered, no concat required self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1) index1 = [[8, 10, 3], [1, 9, 10]] index2 = [[1, 3, 9], [10, 2, 7]] arr7 = arr[index1, :, index2] res = self.executor.execute_tensor(arr7, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [[1, 3], [3, 7], [7, 7]] index2 = [1, 9] arr8 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr8, concat=True)[0] np.testing.assert_array_equal(res, raw[0, index1, :, index2]) def testFancyIndexingTensorExecution(self): # test fancy index of type tensor raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=4) arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index]) raw_index = np.random.permutation(8) index = tensor(raw_index, chunk_size=3) arr3 = arr[:2, ..., index] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[:2, ..., raw_index]) raw_index = [1, 3, 9, 10] index = tensor(raw_index) arr4 = arr[..., index, :5] res = self.executor.execute_tensor(arr4, concat=True)[0] np.testing.assert_array_equal(res, raw[..., raw_index, :5]) raw_index1 = [8, 10, 3, 1, 9, 10] raw_index2 = [1, 3, 9, 10, 2, 7] index1 = tensor(raw_index1, chunk_size=4) index2 = tensor(raw_index2, chunk_size=3) arr5 = arr[index1, :, index2] res = self.executor.execute_tensor(arr5, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [1, 3, 5, 7, 9, 10] raw_index2 = [1, 9, 9, 10, 2, 7] index1 = tensor(raw_index1, chunk_size=3) index2 = tensor(raw_index2, chunk_size=4) arr6 = arr[index1, :, index2] res = self.executor.execute_tensor(arr6, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [[8, 10, 3], [1, 9, 10]] raw_index2 = [[1, 3, 9], [10, 2, 7]] index1 = tensor(raw_index1) index2 = tensor(raw_index2, chunk_size=2) arr7 = arr[index1, :, index2] res = self.executor.execute_tensor(arr7, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [[1, 3], [3, 7], [7, 7]] raw_index2 = [1, 9] index1 = tensor(raw_index1, chunk_size=(2, 1)) index2 = tensor(raw_index2) arr8 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr8, concat=True)[0] np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2]) raw_a = np.random.rand(30, 30) a = tensor(raw_a, chunk_size=(13, 17)) b = a.argmax(axis=0) c = a[b, arange(30)] res = self.executor.execute_tensor(c, concat=True)[0] np.testing.assert_array_equal( res, raw_a[raw_a.argmax(axis=0), np.arange(30)]) # test one chunk arr = tensor(raw, chunk_size=20) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=20) arr9 = arr[index] res = self.executor.execute_tensor(arr9, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index]) raw_index1 = [[1, 3], [3, 7], [7, 7]] raw_index2 = [1, 9] index1 = tensor(raw_index1) index2 = tensor(raw_index2) arr10 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr10, concat=True)[0] np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2]) # test order raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=4) arr11 = arr[index] res = self.executor.execute_tensor(arr11, concat=True)[0] expected = raw[raw_index].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testSliceExecution(self): raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=3) arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]) arr3 = arr[-4, 2:] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_equal(res, raw[-4, 2:]) raw = sps.random(12, 14, density=.1) arr = tensor(raw, chunk_size=3) arr2 = arr[-1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_equal(res.toarray(), raw.toarray()[-1:-9:-2, 12:-11:-4]) # test order raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=3) arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr3 = arr[0:13, :, None] res = self.executor.execute_tensor(arr3, concat=True)[0] expected = raw[0:13, :, None].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testMixedIndexingExecution(self): rs = np.random.RandomState(0) raw = rs.random((11, 8, 12, 13)) arr = tensor(raw, chunk_size=3) raw_cond = raw[0, :, 0, 0] < .5 cond = tensor(raw[0, :, 0, 0], chunk_size=3) < .5 arr2 = arr[10::-2, cond, None, ..., :5] size_res = self.executor.execute_tensor(arr2, mock=True) res = self.executor.execute_tensor(arr2, concat=True)[0] new_shape = list(arr2.shape) new_shape[1] = cond.shape[0] self.assertEqual(sum(s[0] for s in size_res), int(np.prod(new_shape) * arr2.dtype.itemsize)) np.testing.assert_array_equal(res, raw[10::-2, raw_cond, None, ..., :5]) b_raw = np.random.random(8) raw_cond = b_raw < .5 conds = [raw_cond, tensor(b_raw, chunk_size=2) < .5] for cond in conds: arr3 = arr[-2::-3, cond, ...] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[-2::-3, raw_cond, ...]) # test multiple bool index and fancy index cond1 = np.zeros(11, dtype=bool) cond1[rs.permutation(11)[:5]] = True cond2 = np.zeros(12, dtype=bool) cond2[rs.permutation(12)[:5]] = True f3 = np.random.randint(13, size=5) expected = raw[cond1, ..., cond2, f3] t = arr[cond1, ..., cond2, f3] res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) ctx, executor = self._create_test_context(self.executor) with ctx: t = arr[tensor(cond1), ..., tensor(cond2), tensor(f3)] res = executor.execute_tensors([t])[0] np.testing.assert_array_equal(res, expected) def testSetItemExecution(self): rs = np.random.RandomState(0) raw = data = rs.randint(0, 10, size=(11, 8, 12, 13)) arr = tensor(raw.copy(), chunk_size=3) raw = raw.copy() idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2 arr[idx] = 20 res = self.executor.execute_tensor(arr, concat=True)[0] raw[idx] = 20 np.testing.assert_array_equal(res, raw) self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS']) raw = data shape = raw[idx].shape arr2 = tensor(raw.copy(), chunk_size=3) raw = raw.copy() replace = rs.randint(10, 20, size=shape[:-1] + (1, )).astype('f4') arr2[idx] = tensor(replace, chunk_size=4) res = self.executor.execute_tensor(arr2, concat=True)[0] raw[idx] = replace np.testing.assert_array_equal(res, raw) raw = np.asfortranarray(np.random.randint(0, 10, size=(11, 8, 12, 13))) arr = tensor(raw.copy('A'), chunk_size=3) raw = raw.copy('A') idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2 arr[idx] = 20 res = self.executor.execute_tensor(arr, concat=True)[0] raw[idx] = 20 np.testing.assert_array_equal(res, raw) self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS']) # test bool indexing set raw = data arr = tensor(raw.copy(), chunk_size=3) raw1 = rs.rand(11) arr[tensor(raw1, chunk_size=4) < 0.6, 2:7] = 3 res = self.executor.execute_tileable(arr, concat=True)[0] raw[raw1 < 0.6, 2:7] = 3 np.testing.assert_array_equal(res, raw) raw = np.random.randint(3, size=10).astype(np.int64) raw2 = np.arange(3) arr = zeros((10, 3)) arr[tensor(raw) == 1, tensor(raw2) == 1] = 1 res = self.executor.execute_tileable(arr, concat=True)[0] expected = np.zeros((10, 3)) expected[raw == 1, raw2 == 1] = 1 np.testing.assert_array_equal(res, expected) ctx, executor = self._create_test_context(self.executor) with ctx: raw = data arr = tensor(raw.copy(), chunk_size=3) raw1 = rs.rand(11) set_data = rs.rand((raw1 < 0.8).sum(), 8, 12, 13) arr[tensor(raw1, chunk_size=4) < 0.8] = tensor(set_data) res = self.executor.execute_tileables([arr])[0] raw[raw1 < 0.8] = set_data np.testing.assert_array_equal(res, raw) # test error with self.assertRaises(ValueError): t = tensor(raw, chunk_size=3) t[0, 0, 0, 0] = zeros(2, chunk_size=10) _ = self.executor.execute_tensor(t) def testSetItemStructuredExecution(self): rec_type = np.dtype([('a', np.int32), ('b', np.double), ('c', np.dtype([('a', np.int16), ('b', np.int64)]))]) raw = np.zeros((4, 5), dtype=rec_type) arr = tensor(raw.copy(), chunk_size=3) arr[1:4, 1] = (3, 4., (5, 6)) arr[1:4, 2] = 8 arr[1:3] = np.arange(5) arr[2:4] = np.arange(10).reshape(2, 5) arr[0] = np.arange(5) raw[1:4, 1] = (3, 4., (5, 6)) raw[1:4, 2] = 8 raw[1:3] = np.arange(5) raw[2:4] = np.arange(10).reshape(2, 5) raw[0] = np.arange(5) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(arr.dtype, raw.dtype) self.assertEqual(arr.shape, raw.shape) np.testing.assert_array_equal(res, raw) def testTakeExecution(self): data = np.random.rand(10, 20, 30) t = tensor(data, chunk_size=10) a = t.take([4, 1, 2, 6, 200]) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.take(data, [4, 1, 2, 6, 200]) np.testing.assert_array_equal(res, expected) a = take(t, [5, 19, 2, 13], axis=1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.take(data, [5, 19, 2, 13], axis=1) np.testing.assert_array_equal(res, expected) with self.assertRaises(ValueError): take(t, [1, 3, 4], out=tensor(np.random.rand(4))) out = tensor([1, 2, 3, 4]) a = take(t, [4, 19, 2, 8], out=out) res = self.executor.execute_tensor(out, concat=True)[0] expected = np.take(data, [4, 19, 2, 8]) np.testing.assert_array_equal(res, expected) def testCompressExecution(self): data = np.array([[1, 2], [3, 4], [5, 6]]) a = tensor(data, chunk_size=1) t = compress([0, 1], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1], data, axis=0) np.testing.assert_array_equal(res, expected) t = compress([0, 1], a, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1], data, axis=1) np.testing.assert_array_equal(res, expected) t = a.compress([0, 1, 1]) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data) np.testing.assert_array_equal(res, expected) t = compress([False, True, True], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([False, True, True], data, axis=0) np.testing.assert_array_equal(res, expected) t = compress([False, True], a, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([False, True], data, axis=1) np.testing.assert_array_equal(res, expected) with self.assertRaises(np.AxisError): compress([0, 1, 1], a, axis=1) # test order data = np.asfortranarray([[1, 2], [3, 4], [5, 6]]) a = tensor(data, chunk_size=1) t = compress([0, 1, 1], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data, axis=0) np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) t = compress([0, 1, 1], a, axis=0, out=tensor(np.empty((2, 2), order='F', dtype=int))) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data, axis=0, out=np.empty((2, 2), order='F', dtype=int)) np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testExtractExecution(self): data = np.arange(12).reshape((3, 4)) a = tensor(data, chunk_size=2) condition = mod(a, 3) == 0 t = extract(condition, a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.extract(np.mod(data, 3) == 0, data) np.testing.assert_array_equal(res, expected) def testChooseExecution(self): options.chunk_size = 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] a = choose([2, 3, 1, 0], choices) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.choose([2, 3, 1, 0], choices) np.testing.assert_array_equal(res, expected) a = choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) expected = np.choose([2, 4, 1, 0], choices, mode='clip') res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, expected) a = choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) expected = np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, expected) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] b = choose(a, choices) expected = np.choose(a, choices) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) a = np.array([0, 1]).reshape((2, 1, 1)) c1 = np.array([1, 2, 3]).reshape((1, 3, 1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5)) b = choose(a, (c1, c2)) expected = np.choose(a, (c1, c2)) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) # test order a = np.array([0, 1]).reshape((2, 1, 1), order='F') c1 = np.array([1, 2, 3]).reshape((1, 3, 1), order='F') c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5), order='F') b = choose(a, (c1, c2)) expected = np.choose(a, (c1, c2)) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) b = choose(a, (c1, c2), out=tensor(np.empty(res.shape, order='F'))) expected = np.choose(a, (c1, c2), out=np.empty(res.shape, order='F')) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testUnravelExecution(self): a = tensor([22, 41, 37], chunk_size=1) t = stack(unravel_index(a, (7, 6))) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.stack(np.unravel_index([22, 41, 37], (7, 6))) np.testing.assert_array_equal(res, expected) def testNonzeroExecution(self): data = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]]) x = tensor(data, chunk_size=2) t = hstack(nonzero(x)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.hstack(np.nonzero(data)) np.testing.assert_array_equal(res, expected) t = hstack((x > 1).nonzero()) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.hstack(np.nonzero(data > 1)) np.testing.assert_array_equal(res, expected) def testFlatnonzeroExecution(self): x = arange(-2, 3, chunk_size=2) t = flatnonzero(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.flatnonzero(np.arange(-2, 3)) np.testing.assert_equal(res, expected) def testFillDiagonalExecution(self): # 2-d raws = [ np.random.rand(30, 11), np.random.rand(15, 15), np.random.rand(11, 30), sps.random(30, 11, density=0.1, format='csr') ] def copy(x): if hasattr(x, 'nnz'): # sparse return x.A else: return x.copy() for raw in raws: # test 1 chunk, wrap=False t = tensor(raw, chunk_size=30) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) # test 1 chunk, wrap=True t = tensor(raw, chunk_size=30) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunks, wrap=False t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) t = tensor(raw, chunk_size=(4, 12)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with list type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, [1, 2, 3]) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with tensor type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, tensor([1, 2, 3])) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunks, wrap=True t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) t = tensor(raw, chunk_size=(4, 12)) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with list type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, [1, 2, 3], wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3], wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with tensor type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, tensor([[1, 2], [3, 4]]), wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3, 4], wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # 3-d raw = np.random.rand(11, 11, 11) expected = raw.copy() np.fill_diagonal(expected, 1) expected2 = raw.copy() np.fill_diagonal(expected2, 1, wrap=True) np.testing.assert_array_equal(expected, expected2) # test 1 chunk t = tensor(raw, chunk_size=30) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) t = tensor(raw, chunk_size=30) # wrap = True does not take effect when ndim > 2 fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) # test multiple chunk t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) t = tensor(raw, chunk_size=(3, 4, 5)) # wrap = True does not take effect when ndim > 2 fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) # test val with list type t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, [[1, 2], [3, 4]]) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, [1, 2, 3, 4]) np.testing.assert_array_equal(res, expected) # test val with tensor type t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, tensor([1, 2, 3])) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(res, expected) # test val with tensor type which ndim == 0 t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, tensor([1, 2, 3]).sum()) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, 6) np.testing.assert_array_equal(res, expected) # test val with ndarray type which size is too long t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, np.arange(20)) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, np.arange(20)) np.testing.assert_array_equal(res, expected)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testFromPandasDataFrameExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=[np.arange(20), np.arange(20, 0, -1)]) df = from_pandas_df(pdf, chunk_size=(13, 21)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal(pdf, result) def testFromPandasSeriesExecution(self): ps = pd.Series(np.random.rand(20), index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = from_pandas_series(ps, chunk_size=13) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal(ps, result) def testInitializerExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=[np.arange(20), np.arange(20, 0, -1)]) df = md.DataFrame(pdf, chunk_size=(15, 10)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal(pdf, result) ps = pd.Series(np.random.rand(20), index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = md.Series(ps, chunk_size=7) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal(ps, result) def testSeriesFromTensor(self): data = np.random.rand(10) series = md.Series(mt.tensor(data), name='a') pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a')) series = md.Series(mt.tensor(data, chunk_size=3)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data)) series = md.Series(mt.ones((10, ), chunk_size=4)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(np.ones(10, ))) index_data = np.random.rand(10) series = md.Series(mt.tensor(data, chunk_size=3), name='a', index=mt.tensor(index_data, chunk_size=4)) pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a', index=index_data)) def testFromTensorExecution(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = dataframe_from_tensor(tensor) tensor_res = self.executor.execute_tensor(tensor, concat=True)[0] pdf_expected = pd.DataFrame(tensor_res) df_result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10)) pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10)) pd.testing.assert_frame_equal(df_result, pdf_expected) # test converted with specified index_value and columns tensor2 = mt.random.rand(2, 2, chunk_size=1) df2 = dataframe_from_tensor(tensor2, index=pd.Index(['a', 'b']), columns=pd.Index([3, 4])) df_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b'])) pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4])) # test converted from 1-d tensor tensor3 = mt.array([1, 2, 3]) df3 = dataframe_from_tensor(tensor3) result3 = self.executor.execute_dataframe(df3, concat=True)[0] pdf_expected = pd.DataFrame(np.array([1, 2, 3])) pd.testing.assert_frame_equal(pdf_expected, result3) # test converted from identical chunks tensor4 = mt.ones((10, 10), chunk_size=3) df4 = dataframe_from_tensor(tensor4) result4 = self.executor.execute_dataframe(df4, concat=True)[0] pdf_expected = pd.DataFrame( self.executor.execute_tensor(tensor4, concat=True)[0]) pd.testing.assert_frame_equal(pdf_expected, result4) # from tensor with given index tensor5 = mt.ones((10, 10), chunk_size=3) df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2)) result5 = self.executor.execute_dataframe(df5, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor5, concat=True)[0], index=np.arange(0, 20, 2)) pd.testing.assert_frame_equal(pdf_expected, result5) # from tensor with given index that is a tensor raw7 = np.random.rand(10, 10) tensor7 = mt.tensor(raw7, chunk_size=3) index_raw7 = np.random.rand(10) index7 = mt.tensor(index_raw7, chunk_size=4) df7 = dataframe_from_tensor(tensor7, index=index7) result7 = self.executor.execute_dataframe(df7, concat=True)[0] pdf_expected = pd.DataFrame(raw7, index=index_raw7) pd.testing.assert_frame_equal(pdf_expected, result7) # from tensor with given columns tensor6 = mt.ones((10, 10), chunk_size=3) df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij')) result6 = self.executor.execute_dataframe(df6, concat=True)[0] pdf_expected = pd.DataFrame(self.executor.execute_tensor( tensor6, concat=True)[0], columns=list('abcdefghij')) pd.testing.assert_frame_equal(pdf_expected, result6) # from 1d tensors raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10, size=8)), ('c', [ ''.join(np.random.choice(list(printable), size=6)) for _ in range(8) ])] tensors8 = [mt.tensor(r[1], chunk_size=3) for r in raws8] df8 = dataframe_from_1d_tensors(tensors8, columns=[r[0] for r in raws8]) result = self.executor.execute_dataframe(df8, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8)) pd.testing.assert_frame_equal(result, pdf_expected) # from 1d tensors and specify index with a tensor index_raw9 = np.random.rand(8) index9 = mt.tensor(index_raw9, chunk_size=4) df9 = dataframe_from_1d_tensors(tensors8, columns=[r[0] for r in raws8], index=index9) result = self.executor.execute_dataframe(df9, concat=True)[0] pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9) pd.testing.assert_frame_equal(result, pdf_expected) def testFromRecordsExecution(self): dtype = np.dtype([('x', 'int'), ('y', 'double'), ('z', '<U16')]) ndarr = np.ones((10, ), dtype=dtype) pdf_expected = pd.DataFrame.from_records(ndarr, index=pd.RangeIndex(10)) # from structured array of mars tensor = mt.ones((10, ), dtype=dtype, chunk_size=3) df1 = from_records(tensor) df1_result = self.executor.execute_dataframe(df1, concat=True)[0] pd.testing.assert_frame_equal(df1_result, pdf_expected) # from structured array of numpy df2 = from_records(ndarr) df2_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_frame_equal(df2_result, pdf_expected) def testReadCSVExecution(self): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c']) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0, chunk_bytes=10), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test sep tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c']) df.to_csv(file_path, sep=';') pdf = pd.read_csv(file_path, sep=';', index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, sep=';', index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, sep=';', index_col=0, chunk_bytes=10), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test missing value tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame({ 'c1': [np.nan, 'a', 'b', 'c'], 'c2': [1, 2, 3, np.nan], 'c3': [np.nan, np.nan, 3.4, 2.2] }) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0, chunk_bytes=12), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: index = pd.date_range(start='1/1/2018', periods=100) df = pd.DataFrame( { 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }, index=index) df.to_csv(file_path) pdf = pd.read_csv(file_path, index_col=0) mdf = self.executor.execute_dataframe(md.read_csv(file_path, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, index_col=0, chunk_bytes=100), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test compression tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.gzip') try: index = pd.date_range(start='1/1/2018', periods=100) df = pd.DataFrame( { 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }, index=index) df.to_csv(file_path, compression='gzip') pdf = pd.read_csv(file_path, compression='gzip', index_col=0) mdf = self.executor.execute_dataframe(md.read_csv( file_path, compression='gzip', index_col=0), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, compression='gzip', index_col=0, chunk_bytes='1k'), concat=True)[0] pd.testing.assert_frame_equal(pdf, mdf2) finally: shutil.rmtree(tempdir) # test multiply files tempdir = tempfile.mkdtemp() try: df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c']) file_paths = [ os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3) ] df[:100].to_csv(file_paths[0]) df[100:200].to_csv(file_paths[1]) df[200:].to_csv(file_paths[2]) mdf = self.executor.execute_dataframe(md.read_csv(file_paths, index_col=0), concat=True)[0] pd.testing.assert_frame_equal(df, mdf) mdf2 = self.executor.execute_dataframe(md.read_csv(file_paths, index_col=0, chunk_bytes=50), concat=True)[0] pd.testing.assert_frame_equal(df, mdf2) finally: shutil.rmtree(tempdir) # test wildcards in path tempdir = tempfile.mkdtemp() try: df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c']) file_paths = [ os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3) ] df[:100].to_csv(file_paths[0]) df[100:200].to_csv(file_paths[1]) df[200:].to_csv(file_paths[2]) # As we can not guarantee the order in which these files are processed, # the result may not keep the original order. mdf = self.executor.execute_dataframe(md.read_csv( '{}/*.csv'.format(tempdir), index_col=0), concat=True)[0] pd.testing.assert_frame_equal(df, mdf.sort_index()) mdf2 = self.executor.execute_dataframe(md.read_csv( '{}/*.csv'.format(tempdir), index_col=0, chunk_bytes=50), concat=True)[0] pd.testing.assert_frame_equal(df, mdf2.sort_index()) finally: shutil.rmtree(tempdir) @require_cudf def testReadCSVGPUExecution(self): tempdir = tempfile.mkdtemp() file_path = os.path.join(tempdir, 'test.csv') try: df = pd.DataFrame({ 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }) df.to_csv(file_path, index=False) pdf = pd.read_csv(file_path) mdf = self.executor.execute_dataframe(md.read_csv(file_path, gpu=True), concat=True)[0] pd.testing.assert_frame_equal( pdf.reset_index(drop=True), mdf.to_pandas().reset_index(drop=True)) mdf2 = self.executor.execute_dataframe(md.read_csv( file_path, gpu=True, chunk_bytes=200), concat=True)[0] pd.testing.assert_frame_equal( pdf.reset_index(drop=True), mdf2.to_pandas().reset_index(drop=True)) finally: shutil.rmtree(tempdir)
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testQRExecution(self): data = np.random.randn(18, 6) a = tensor(data, chunk_size=(3, 6)) q, r = qr(a) t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=(9, 6)) q, r = qr(a) t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=3) q, r = qr(a) t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) # test for Short-and-Fat QR data = np.random.randn(6, 18) a = tensor(data, chunk_size=(6, 9)) q, r = qr(a, method='sfqr') t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=(3, 3)) q, r = qr(a, method='sfqr') t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=(6, 3)) q, r = qr(a, method='sfqr') t = q.dot(r) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) def testSVDExecution(self): data = np.random.randn(18, 6) + 1j * np.random.randn(18, 6) a = tensor(data, chunk_size=(9, 6)) U, s, V = svd(a) t = U.dot(diag(s).dot(V)) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=(18, 6)) U, s, V = svd(a) t = U.dot(diag(s).dot(V)) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) a = tensor(data, chunk_size=(2, 6)) U, s, V = svd(a) t = U.dot(diag(s).dot(V)) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) data = np.random.randn(6, 18) + 1j * np.random.randn(6, 18) a = tensor(data) U, s, V = svd(a) t = U.dot(diag(s).dot(V)) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, data)) # test for matrix of ones data = np.ones((20, 10)) a = tensor(data, chunk_size=10) s = svd(a)[1] res = self.executor.execute_tensor(s, concat=True)[0] expected = np.linalg.svd(a)[1] np.testing.assert_array_almost_equal(res, expected) def testRandomizedSVDExecution(self): n_samples = 100 n_features = 500 rank = 5 k = 10 for dtype in (np.int32, np.int64, np.float32, np.float64): # generate a matrix X of approximate effective rank `rank` and no noise # component (very structured signal): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.0, random_state=0).astype(dtype, copy=False) self.assertEqual(X.shape, (n_samples, n_features)) dtype = np.dtype(dtype) decimal = 5 if dtype == np.float32 else 7 # compute the singular values of X using the slow exact method X_res = self.executor.execute_tensor(X, concat=True)[0] U, s, V = np.linalg.svd(X_res, full_matrices=False) # Convert the singular values to the specific dtype U = U.astype(dtype, copy=False) s = s.astype(dtype, copy=False) V = V.astype(dtype, copy=False) for normalizer in ['auto', 'LU', 'QR']: # 'none' would not be stable # compute the singular values of X using the fast approximate method Ua, sa, Va = randomized_svd( X, k, power_iteration_normalizer=normalizer, random_state=0) # If the input dtype is float, then the output dtype is float of the # same bit size (f32 is not upcast to f64) # But if the input dtype is int, the output dtype is float64 if dtype.kind == 'f': self.assertEqual(Ua.dtype, dtype) self.assertEqual(sa.dtype, dtype) self.assertEqual(Va.dtype, dtype) else: self.assertEqual(Ua.dtype, np.float64) self.assertEqual(sa.dtype, np.float64) self.assertEqual(Va.dtype, np.float64) self.assertEqual(Ua.shape, (n_samples, k)) self.assertEqual(sa.shape, (k, )) self.assertEqual(Va.shape, (k, n_features)) # ensure that the singular values of both methods are equal up to the # real rank of the matrix sa_res = self.executor.execute_tensor(sa, concat=True)[0] np.testing.assert_almost_equal(s[:k], sa_res, decimal=decimal) # check the singular vectors too (while not checking the sign) dot_res = self.executor.execute_tensor(dot(Ua, Va), concat=True)[0] np.testing.assert_almost_equal(np.dot(U[:, :k], V[:k, :]), dot_res, decimal=decimal) def testCholeskyExecution(self): data = np.random.randint(1, 10, (10, 10)) symmetric_data = data.dot(data.T) a = tensor(symmetric_data, chunk_size=5) U = cholesky(a) t = U.T.dot(U) res_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.triu(res_u), res_u) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, symmetric_data)) L = cholesky(a, lower=True) U = cholesky(a) t = L.dot(U) res = self.executor.execute_tensor(t, concat=True)[0] self.assertTrue(np.allclose(res, symmetric_data)) a = tensor(symmetric_data, chunk_size=2) L = cholesky(a, lower=True) U = cholesky(a) t = L.dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, symmetric_data) a = tensor(symmetric_data, chunk_size=(1, 2)) L = cholesky(a, lower=True) U = cholesky(a) t = L.dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, symmetric_data) a = tensor(symmetric_data, chunk_size=4) L = cholesky(a, lower=True) U = cholesky(a) t = L.dot(U) res_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.triu(res_u), res_u) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, symmetric_data) a = tensor(symmetric_data, chunk_size=3) L = cholesky(a, lower=True) U = cholesky(a) t = L.dot(U) res_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.triu(res_u), res_u) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, symmetric_data) def testLUExecution(self): np.random.seed(1) # square matrix data = np.random.randint(1, 10, (6, 6)) a = tensor(data) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=2) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=(2, 3)) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=4) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) # shape[0] > shape[1] data = np.random.randint(1, 10, (10, 6)) a = tensor(data) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=2) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=(2, 3)) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=4) P, L, U = lu(a) # check lower and upper triangular matrix result_l, result_u = self.executor.execute_tensors([L, U]) np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) # shape[0] < shape[1] data = np.random.randint(1, 10, (6, 10)) a = tensor(data) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=2) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=(2, 3)) P, L, U = lu(a) # check lower and upper triangular matrix result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) a = tensor(data, chunk_size=4) P, L, U = lu(a) # check lower and upper triangular matrix result_l, result_u = self.executor.execute_tensors([L, U]) np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data) # test for sparse data = sps.csr_matrix([[2, 0, 0, 0, 5, 2], [0, 6, 1, 0, 0, 6], [8, 0, 9, 0, 0, 2], [0, 6, 0, 8, 7, 3], [7, 0, 6, 1, 7, 0], [0, 0, 0, 7, 0, 8]]) a = tensor(data) P, L, U = lu(a) result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] # check lower and upper triangular matrix np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) self.assertIsInstance(result_l, SparseNDArray) self.assertIsInstance(result_u, SparseNDArray) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_almost_equal(data.A, res) a = tensor(data, chunk_size=2) P, L, U = lu(a) result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] # check lower and upper triangular matrix np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) self.assertIsInstance(result_l, SparseNDArray) self.assertIsInstance(result_u, SparseNDArray) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_almost_equal(data.A, res) a = tensor(data, chunk_size=(2, 3)) P, L, U = lu(a) result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] # check lower and upper triangular matrix np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) self.assertIsInstance(result_l, SparseNDArray) self.assertIsInstance(result_u, SparseNDArray) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_almost_equal(data.A, res) a = tensor(data, chunk_size=4) P, L, U = lu(a) result_l = self.executor.execute_tensor(L, concat=True)[0] result_u = self.executor.execute_tensor(U, concat=True)[0] # check lower and upper triangular matrix np.testing.assert_allclose(np.tril(result_l), result_l) np.testing.assert_allclose(np.triu(result_u), result_u) self.assertIsInstance(result_l, SparseNDArray) self.assertIsInstance(result_u, SparseNDArray) t = P.dot(L).dot(U) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_almost_equal(data.A, res) def testSolveTriangular(self): from mars.tensor import tril, triu np.random.seed(1) data1 = np.random.randint(1, 10, (20, 20)) data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=20) b = tensor(data2, chunk_size=20) x = solve_triangular(A, b) t = triu(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) x = solve_triangular(A, b, lower=True) t = tril(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) A = tensor(data1, chunk_size=10) b = tensor(data2, chunk_size=10) x = solve_triangular(A, b) t = triu(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) x = solve_triangular(A, b, lower=True) t = tril(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) data1 = np.random.randint(1, 10, (10, 10)) data2 = np.random.randint(1, 10, (10, 5)) A = tensor(data1, chunk_size=10) b = tensor(data2, chunk_size=10) x = solve_triangular(A, b) t = triu(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) x = solve_triangular(A, b, lower=True) t = tril(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) A = tensor(data1, chunk_size=3) b = tensor(data2, chunk_size=3) x = solve_triangular(A, b) t = triu(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) x = solve_triangular(A, b, lower=True) t = tril(A).dot(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, data2) # test sparse data1 = sps.csr_matrix(np.triu(np.random.randint(1, 10, (10, 10)))) data2 = np.random.random((10, )) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve_triangular(A, b) result_x = self.executor.execute_tensor(x, concat=True)[0] result_b = data1.dot(result_x) self.assertIsInstance(result_x, SparseNDArray) np.testing.assert_allclose(result_b, data2) data1 = sps.csr_matrix(np.triu(np.random.randint(1, 10, (10, 10)))) data2 = np.random.random((10, 2)) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve_triangular(A, b) result_x = self.executor.execute_tensor(x, concat=True)[0] result_b = data1.dot(result_x) self.assertIsInstance(result_x, SparseNDArray) np.testing.assert_allclose(result_b, data2) def testSolve(self): import scipy.linalg np.random.seed(1) data1 = np.random.randint(1, 10, (20, 20)) data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) data2 = np.random.randint(1, 10, (20, 5)) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) data2 = np.random.randint(1, 10, (20, 20)) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) # test for not all chunks are square in matrix A data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=6) b = tensor(data2, chunk_size=6) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) A = tensor(data1, chunk_size=(7, 6)) b = tensor(data2, chunk_size=6) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) # test sparse data1 = sps.csr_matrix(np.random.randint(1, 10, (20, 20))) data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(x, concat=True)[0] self.assertIsInstance(res, SparseNDArray) np.testing.assert_allclose(data1.dot(res), data2) data2 = np.random.randint(1, 10, (20, 5)) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] self.assertIsInstance(res, SparseNDArray) np.testing.assert_allclose(res, data2) data2 = np.random.randint(1, 10, (20, 20)) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] self.assertIsInstance(res, SparseNDArray) np.testing.assert_allclose(res, data2) # test for not all chunks are square in matrix A data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=6) b = tensor(data2, chunk_size=6) x = solve(A, b) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) def testSolveSymPos(self): import scipy.linalg np.random.seed(1) data = np.random.randint(1, 10, (20, 20)) data_l = np.tril(data) data1 = data_l.dot(data_l.T) data2 = np.random.randint(1, 10, (20, )) A = tensor(data1, chunk_size=5) b = tensor(data2, chunk_size=5) x = solve(A, b, sym_pos=True) res = self.executor.execute_tensor(x, concat=True)[0] np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2)) res = self.executor.execute_tensor(A.dot(x), concat=True)[0] np.testing.assert_allclose(res, data2) def testInv(self): import scipy.linalg np.random.seed(1) data = np.random.randint(1, 10, (20, 20)) A = tensor(data) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) A = tensor(data, chunk_size=5) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) # test 1 chunk A = tensor(data, chunk_size=20) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) B = A.T.dot(A) inv_B = inv(B) res = self.executor.execute_tensor(inv_B, concat=True)[0] self.assertTrue(np.allclose(res, scipy.linalg.inv(data.T.dot(data)))) res = self.executor.execute_tensor(B.dot(inv_B), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) # test for not all chunks are square in matrix A A = tensor(data, chunk_size=8) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) # test sparse data = np.random.randint(1, 10, (20, 20)) sp_data = sps.csr_matrix(data) A = tensor(sp_data, chunk_size=5) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertIsInstance(res, SparseNDArray) self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) # test for not all chunks are square in matrix A A = tensor(sp_data, chunk_size=8) inv_A = inv(A) res = self.executor.execute_tensor(inv_A, concat=True)[0] self.assertIsInstance(res, SparseNDArray) self.assertTrue(np.allclose(res, scipy.linalg.inv(data))) res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0] self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float))) @ignore_warning def testNormExecution(self): d = np.arange(9) - 4 d2 = d.reshape(3, 3) ma = [ tensor(d, chunk_size=2), tensor(d, chunk_size=9), tensor(d2, chunk_size=(2, 3)), tensor(d2, chunk_size=3) ] for i, a in enumerate(ma): data = d if i < 2 else d2 for ord in (None, 'nuc', np.inf, -np.inf, 0, 1, -1, 2, -2): for axis in (0, 1, (0, 1)): for keepdims in (True, False): try: expected = np.linalg.norm(data, ord=ord, axis=axis, keepdims=keepdims) t = norm(a, ord=ord, axis=axis, keepdims=keepdims) concat = t.ndim > 0 res = self.executor.execute_tensor( t, concat=concat)[0] np.testing.assert_allclose(res, expected, atol=.0001) except ValueError: continue m = norm(tensor(d)) expected = self.executor.execute_tensor(m)[0] res = np.linalg.norm(d) self.assertEqual(expected, res) d = uniform(-0.5, 0.5, size=(500, 2), chunk_size=50) inside = (norm(d, axis=1) < 0.5).sum().astype(float) t = inside / 500 * 4 res = self.executor.execute_tensor(t)[0] self.assertAlmostEqual(res, 3.14, delta=1) raw = np.random.RandomState(0).rand(10, 10) d = norm(tensor(raw, chunk_size=5)) expected = self.executor.execute_tensor(d, concat=True)[0] result = np.linalg.norm(raw) np.testing.assert_allclose(expected, result) def testTensordotExecution(self): size_executor = ExecutorForTest( sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) a_data = np.arange(60).reshape(3, 4, 5) a = tensor(a_data, chunk_size=2) b_data = np.arange(24).reshape(4, 3, 2) b = tensor(b_data, chunk_size=2) axes = ([1, 0], [0, 1]) c = tensordot(a, b, axes=axes) size_res = size_executor.execute_tensor(c, mock=True) self.assertEqual(sum(s[0] for s in size_res), c.nbytes) self.assertEqual(sum(s[1] for s in size_res), c.nbytes) res = self.executor.execute_tensor(c) expected = np.tensordot(a_data, b_data, axes=axes) self.assertTrue(np.array_equal(res[0], expected[:2, :])) self.assertTrue(np.array_equal(res[1], expected[2:4, :])) self.assertTrue(np.array_equal(res[2], expected[4:, :])) a = ones((1000, 2000), chunk_size=500) b = ones((2000, 100), chunk_size=500) c = dot(a, b) res = self.executor.execute_tensor(c) expected = np.dot(np.ones((1000, 2000)), np.ones((2000, 100))) self.assertEqual(len(res), 2) self.assertTrue(np.array_equal(res[0], expected[:500, :])) self.assertTrue(np.array_equal(res[1], expected[500:, :])) a = ones((10, 8), chunk_size=2) b = ones((8, 10), chunk_size=2) c = a.dot(b) res = self.executor.execute_tensor(c) self.assertEqual(len(res), 25) for r in res: self.assertTrue(np.array_equal(r, np.tile([8], [2, 2]))) a = ones((500, 500), chunk_size=500) b = ones((500, 100), chunk_size=500) c = a.dot(b) res = self.executor.execute_tensor(c) self.assertTrue(np.array_equal(res[0], np.tile([500], [500, 100]))) raw_a = np.random.random((100, 200, 50)) raw_b = np.random.random((200, 10, 100)) a = tensor(raw_a, chunk_size=50) b = tensor(raw_b, chunk_size=33) c = tensordot(a, b, axes=((0, 1), (2, 0))) res = self.executor.execute_tensor(c, concat=True) expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes)) self.assertTrue(np.allclose(res[0], expected)) a = ones((1000, 2000), chunk_size=500) b = ones((100, 2000), chunk_size=500) c = inner(a, b) res = self.executor.execute_tensor(c) expected = np.inner(np.ones((1000, 2000)), np.ones((100, 2000))) self.assertEqual(len(res), 2) self.assertTrue(np.array_equal(res[0], expected[:500, :])) self.assertTrue(np.array_equal(res[1], expected[500:, :])) a = ones((100, 100), chunk_size=30) b = ones((100, 100), chunk_size=30) c = a.dot(b) res = self.executor.execute_tensor(c, concat=True)[0] np.testing.assert_array_equal(res, np.ones((100, 100)) * 100) def testSparseDotSizeExecution(self): from mars.tensor.linalg.tensordot import TensorTensorDot from mars.executor import register, register_default chunk_sizes = dict() chunk_nbytes = dict() chunk_input_sizes = dict() chunk_input_nbytes = dict() def execute_size(t): def _tensordot_size_recorder(ctx, op): TensorTensorDot.estimate_size(ctx, op) chunk_key = op.outputs[0].key chunk_sizes[chunk_key] = ctx[chunk_key] chunk_nbytes[chunk_key] = op.outputs[0].nbytes input_sizes = dict( (inp.op.key, ctx[inp.key][0]) for inp in op.inputs) chunk_input_sizes[chunk_key] = sum(input_sizes.values()) input_nbytes = dict( (inp.op.key, inp.nbytes) for inp in op.inputs) chunk_input_nbytes[chunk_key] = sum(input_nbytes.values()) size_executor = ExecutorForTest( sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) try: chunk_sizes.clear() chunk_nbytes.clear() chunk_input_sizes.clear() chunk_input_nbytes.clear() register(TensorTensorDot, size_estimator=_tensordot_size_recorder) size_executor.execute_tensor(t, mock=True) finally: register_default(TensorTensorDot) a_data = sps.random(5, 9, density=.1) b_data = sps.random(9, 10, density=.2) a = tensor(a_data, chunk_size=2) b = tensor(b_data, chunk_size=3) c = dot(a, b) execute_size(c) for key in chunk_input_sizes.keys(): self.assertGreaterEqual(chunk_sizes[key][1], chunk_input_sizes[key]) c2 = dot(a, b, sparse=False) execute_size(c2) for key in chunk_input_sizes.keys(): self.assertEqual(chunk_sizes[key][0], chunk_nbytes[key]) self.assertEqual(chunk_sizes[key][1], chunk_input_nbytes[key] + chunk_nbytes[key]) def testSparseDotExecution(self): a_data = sps.random(5, 9, density=.1) b_data = sps.random(9, 10, density=.2) a = tensor(a_data, chunk_size=2) b = tensor(b_data, chunk_size=3) c = dot(a, b) res = self.executor.execute_tensor(c, concat=True)[0] self.assertTrue(issparse(res)) np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray()) c2 = dot(a, b, sparse=False) res = self.executor.execute_tensor(c2, concat=True)[0] self.assertFalse(issparse(res)) np.testing.assert_allclose(res, a_data.dot(b_data).toarray()) c3 = tensordot(a, b.T, (-1, -1), sparse=False) res = self.executor.execute_tensor(c3, concat=True)[0] self.assertFalse(issparse(res)) np.testing.assert_allclose(res, a_data.dot(b_data).toarray()) c = inner(a, b.T) res = self.executor.execute_tensor(c, concat=True)[0] self.assertTrue(issparse(res)) np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray()) c = inner(a, b.T, sparse=False) res = self.executor.execute_tensor(c, concat=True)[0] self.assertFalse(issparse(res)) np.testing.assert_allclose(res, a_data.dot(b_data).toarray()) # test vector inner a_data = np.random.rand(5) b_data = np.random.rand(5) a = tensor(a_data, chunk_size=2).tosparse() b = tensor(b_data, chunk_size=2).tosparse() c = inner(a, b) res = self.executor.execute_tensor(c, concat=True)[0] self.assertTrue(np.isscalar(res)) np.testing.assert_allclose(res, np.inner(a_data, b_data)) def testVdotExecution(self): a_data = np.array([1 + 2j, 3 + 4j]) b_data = np.array([5 + 6j, 7 + 8j]) a = tensor(a_data, chunk_size=1) b = tensor(b_data, chunk_size=1) t = vdot(a, b) res = self.executor.execute_tensor(t)[0] expected = np.vdot(a_data, b_data) np.testing.assert_equal(res, expected) a_data = np.array([[1, 4], [5, 6]]) b_data = np.array([[4, 1], [2, 2]]) a = tensor(a_data, chunk_size=1) b = tensor(b_data, chunk_size=1) t = vdot(a, b) res = self.executor.execute_tensor(t)[0] expected = np.vdot(a_data, b_data) np.testing.assert_equal(res, expected) def testMatmulExecution(self): data_a = np.random.randn(10, 20) data_b = np.random.randn(20) a = tensor(data_a, chunk_size=2) b = tensor(data_b, chunk_size=3) c = matmul(a, b) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a, data_b) np.testing.assert_allclose(res, expected) data_a = np.random.randn(10, 20) data_b = np.random.randn(10) a = tensor(data_a, chunk_size=2) b = tensor(data_b, chunk_size=3) c = matmul(b, a) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_b, data_a) np.testing.assert_allclose(res, expected) data_a = np.random.randn(15, 1, 20, 30) data_b = np.random.randn(1, 11, 30, 20) a = tensor(data_a, chunk_size=12) b = tensor(data_b, chunk_size=13) c = matmul(a, b) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a, data_b) np.testing.assert_allclose(res, expected, atol=.0001) a = arange(2 * 2 * 4, chunk_size=1).reshape((2, 2, 4)) b = arange(2 * 2 * 4, chunk_size=1).reshape((2, 4, 2)) c = matmul(a, b) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul( np.arange(2 * 2 * 4).reshape(2, 2, 4), np.arange(2 * 2 * 4).reshape(2, 4, 2)) np.testing.assert_allclose(res, expected, atol=.0001) data_a = sps.random(10, 20) data_b = sps.random(20, 5) a = tensor(data_a, chunk_size=2) b = tensor(data_b, chunk_size=3) c = matmul(a, b) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a.toarray(), data_b.toarray()) np.testing.assert_allclose(res.toarray(), expected) # test order data_a = np.asfortranarray(np.random.randn(10, 20)) data_b = np.asfortranarray(np.random.randn(20, 30)) a = tensor(data_a, chunk_size=12) b = tensor(data_b, chunk_size=13) c = matmul(a, b) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a, data_b) np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) c = matmul(a, b, order='A') res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a, data_b, order='A') np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) c = matmul(a, b, order='C') res = self.executor.execute_tensor(c, concat=True)[0] expected = np.matmul(data_a, data_b, order='C') np.testing.assert_allclose(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS'])
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testReshapeExecution(self): x = ones((1, 2, 3), chunk_size=[4, 3, 5]) y = x.reshape(3, 2) res = self.executor.execute_tensor(y)[0] self.assertEqual(y.shape, (3, 2)) np.testing.assert_equal(res, np.ones((3, 2))) data = np.random.rand(6, 4) x2 = tensor(data, chunk_size=2) y2 = x2.reshape(3, 8, order='F') res = self.executor.execute_tensor(y2, concat=True)[0] expected = data.reshape((3, 8), order='F') np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) data2 = np.asfortranarray(np.random.rand(6, 4)) x3 = tensor(data2, chunk_size=2) y3 = x3.reshape(3, 8) res = self.executor.execute_tensor(y3, concat=True)[0] expected = data2.reshape((3, 8)) np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['C_CONTIGUOUS']) self.assertFalse(res.flags['F_CONTIGUOUS']) data2 = np.asfortranarray(np.random.rand(6, 4)) x3 = tensor(data2, chunk_size=2) y3 = x3.reshape(3, 8, order='F') res = self.executor.execute_tensor(y3, concat=True)[0] expected = data2.reshape((3, 8), order='F') np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testShuffleReshapeExecution(self): a = ones((31, 27), chunk_size=10) b = a.reshape(27, 31) b.op.extra_params['_reshape_with_shuffle'] = True res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, np.ones((27, 31))) b2 = a.reshape(27, 31, order='F') b.op.extra_params['_reshape_with_shuffle'] = True res = self.executor.execute_tensor(b2)[0] self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) data = np.random.rand(6, 4) x2 = tensor(data, chunk_size=2) y2 = x2.reshape(4, 6, order='F') y2.op.extra_params['_reshape_with_shuffle'] = True res = self.executor.execute_tensor(y2, concat=True)[0] expected = data.reshape((4, 6), order='F') np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) data2 = np.asfortranarray(np.random.rand(6, 4)) x3 = tensor(data2, chunk_size=2) y3 = x3.reshape(4, 6) y3.op.extra_params['_reshape_with_shuffle'] = True res = self.executor.execute_tensor(y3, concat=True)[0] expected = data2.reshape((4, 6)) np.testing.assert_array_equal(res, expected) self.assertTrue(res.flags['C_CONTIGUOUS']) self.assertFalse(res.flags['F_CONTIGUOUS'])
def testStoreHDF5Execution(self): raw = np.random.RandomState(0).rand(10, 20) group_name = 'test_group' dataset_name = 'test_dataset' t1 = tensor(raw, chunk_size=20) t2 = tensor(raw, chunk_size=9) with self.assertRaises(TypeError): tohdf5(object(), t2) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, 'test_store_{}.hdf5'.format(int(time.time()))) # test 1 chunk r = tohdf5(filename, t1, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) # test filename r = tohdf5(filename, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) rt = get_tiled(r) self.assertEqual(type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive') self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): tohdf5(filename, t2) with h5py.File(filename, 'r') as f: # test file r = tohdf5(f, t2, group=group_name, dataset=dataset_name) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw) with self.assertRaises(ValueError): with h5py.File(filename, 'r') as f: tohdf5(f, t2) with h5py.File(filename, 'r') as f: # test dataset ds = f['{}/{}'.format(group_name, dataset_name)] # test file r = tohdf5(ds, t2) executor.execute_tensor(r) with h5py.File(filename, 'r') as f: result = np.asarray(f['{}/{}'.format(group_name, dataset_name)]) np.testing.assert_array_equal(result, raw)
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') def testEinsumExecution(self): data1 = np.random.rand(3, 4, 5) data2 = np.random.rand(4, 3, 2) t1 = tensor(data1, chunk_size=2) t2 = tensor(data2, chunk_size=3) t = einsum('ijk, jil -> kl', t1, t2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('ijk, jil -> kl', data1, data2) np.testing.assert_almost_equal(res, expected) # dot t = einsum('ijk, jil', t1, t2, optimize=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('ijk, jil', data1, data2, optimize=True) np.testing.assert_almost_equal(res, expected) # multiply(data1, data2) data1 = np.random.rand(6, 6) data2 = np.random.rand(6, 6) t1 = tensor(data1, chunk_size=3) t2 = tensor(data2, chunk_size=3) t = einsum('..., ...', t1, t2, order='C') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('..., ...', data1, data2, order='C') np.testing.assert_almost_equal(res, expected) # sum(data, axis=-1) data = np.random.rand(10) t1 = tensor(data, chunk_size=3) t = einsum('i->', t1, order='F') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('i->', data, order='F') np.testing.assert_almost_equal(res, expected) # sum(data, axis=0) t1 = tensor(data) t = einsum('...i->...', t1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('...i->...', data) np.testing.assert_almost_equal(res, expected) # test broadcast data1 = np.random.rand(1, 10, 9) data2 = np.random.rand(9, 6) data3 = np.random.rand(10, 6) data4 = np.random.rand(8, ) t1 = tensor(data1, chunk_size=(1, (5, 5), (3, 3, 3))) t2 = tensor(data2, chunk_size=((3, 3, 3), (3, 3))) t3 = tensor(data3, chunk_size=((6, 4), (4, 2))) t4 = tensor(data4, chunk_size=4) t = einsum('ajk,kl,jl,a->a', t1, t2, t3, t4, optimize='optimal') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('ajk,kl,jl,a->a', data1, data2, data3, data4, optimize='optimal') np.testing.assert_almost_equal(res, expected) t = einsum('ajk,kl,jl,a->a', t1, t2, t3, t4, optimize='greedy') res = self.executor.execute_tensor(t, concat=True)[0] expected = np.einsum('ajk,kl,jl,a->a', data1, data2, data3, data4, optimize='greedy') np.testing.assert_almost_equal(res, expected)