class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def _nan_equal(self, a, b): try: np.testing.assert_equal(a, b) except AssertionError: return False return True def testBaseExecution(self): arr = ones((10, 8), chunk_size=2) arr2 = arr + 1 res = self.executor.execute_tensor(arr2) self.assertTrue((res[0] == np.ones((2, 2)) + 1).all()) data = np.random.random((10, 8, 3)) arr = tensor(data, chunk_size=2) arr2 = arr + 1 res = self.executor.execute_tensor(arr2) self.assertTrue((res[0] == data[:2, :2, :2] + 1).all()) def testBaseOrderExecution(self): raw = np.asfortranarray(np.random.rand(5, 6)) arr = tensor(raw, chunk_size=3) res = self.executor.execute_tensor(arr + 1, concat=True)[0] np.testing.assert_array_equal(res, raw + 1) self.assertFalse(res.flags['C_CONTIGUOUS']) self.assertTrue(res.flags['F_CONTIGUOUS']) res2 = self.executor.execute_tensor(add(arr, 1, order='C'), concat=True)[0] np.testing.assert_array_equal(res2, np.add(raw, 1, order='C')) self.assertTrue(res2.flags['C_CONTIGUOUS']) self.assertFalse(res2.flags['F_CONTIGUOUS']) @staticmethod def _get_func(op): if isinstance(op, str): return getattr(np, op) return op def testUfuncExecution(self): from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \ invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp _sp_unary_ufunc = {arccosh, invert} _sp_bin_ufunc = { mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp } data1 = np.random.random((5, 9, 4)) data2 = np.random.random((5, 9, 4)) rand = np.random.random() arr1 = tensor(data1, chunk_size=3) arr2 = tensor(data2, chunk_size=3) _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc for func in _new_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_func(res_tensor.op._func_name)(data1) self.assertTrue(np.allclose(res[0], expected)) _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc for func in _new_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1) self.assertTrue(np.allclose(res1[0], expected1)) self.assertTrue(np.allclose(res2[0], expected2)) self.assertTrue(np.allclose(res3[0], expected3)) data1 = np.random.randint(2, 10, size=(10, 10, 10)) data2 = np.random.randint(2, 10, size=(10, 10, 10)) rand = np.random.randint(1, 10) arr1 = tensor(data1, chunk_size=6) arr2 = tensor(data2, chunk_size=6) for func in _sp_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_func(res_tensor.op._func_name)(data1) self.assertTrue(np.allclose(res[0], expected)) for func in _sp_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1) self.assertTrue(np.allclose(res1[0], expected1)) self.assertTrue(np.allclose(res2[0], expected2)) self.assertTrue(np.allclose(res3[0], expected3)) @staticmethod def _get_sparse_func(op): from mars.lib.sparse.core import issparse if isinstance(op, str): op = getattr(np, op) def func(*args): new_args = [] for arg in args: if issparse(arg): new_args.append(arg.toarray()) else: new_args.append(arg) return op(*new_args) return func @staticmethod def toarray(x): if hasattr(x, 'toarray'): return x.toarray() return x @ignore_warning def testSparseUfuncExecution(self): from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \ invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp _sp_unary_ufunc = {arccosh, invert} _sp_bin_ufunc = { mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp } data1 = sps.random(5, 9, density=.1) data2 = sps.random(5, 9, density=.2) rand = np.random.random() arr1 = tensor(data1, chunk_size=3) arr2 = tensor(data2, chunk_size=3) _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc for func in _new_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_sparse_func(res_tensor.op._func_name)(data1) self._nan_equal(self.toarray(res[0]), expected) _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc for func in _new_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1) self._nan_equal(self.toarray(res1[0]), expected1) self._nan_equal(self.toarray(res2[0]), expected2) self._nan_equal(self.toarray(res3[0]), expected3) data1 = np.random.randint(2, 10, size=(10, 10)) data2 = np.random.randint(2, 10, size=(10, 10)) rand = np.random.randint(1, 10) arr1 = tensor(data1, chunk_size=3).tosparse() arr2 = tensor(data2, chunk_size=3).tosparse() for func in _sp_unary_ufunc: res_tensor = func(arr1) res = self.executor.execute_tensor(res_tensor, concat=True) expected = self._get_sparse_func(res_tensor.op._func_name)(data1) self._nan_equal(self.toarray(res[0]), expected) for func in _sp_bin_ufunc: res_tensor1 = func(arr1, arr2) res_tensor2 = func(arr1, rand) res_tensor3 = func(rand, arr1) res1 = self.executor.execute_tensor(res_tensor1, concat=True) res2 = self.executor.execute_tensor(res_tensor2, concat=True) res3 = self.executor.execute_tensor(res_tensor3, concat=True) expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2) expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand) expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1) self._nan_equal(self.toarray(res1[0]), expected1) self._nan_equal(self.toarray(res2[0]), expected2) self._nan_equal(self.toarray(res3[0]), expected3) def testAddWithOutExecution(self): data1 = np.random.random((5, 9, 4)) data2 = np.random.random((9, 4)) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) add(arr1, arr2, out=arr1) res = self.executor.execute_tensor(arr1, concat=True)[0] self.assertTrue(np.array_equal(res, data1 + data2)) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) arr3 = add(arr1, arr2, out=arr1.astype('i4'), casting='unsafe') res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, (data1 + data2).astype('i4')) arr1 = tensor(data1.copy(), chunk_size=3) arr2 = tensor(data2.copy(), chunk_size=3) arr3 = truediv(arr1, arr2, out=arr1, where=arr2 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] self.assertTrue( np.array_equal( res, np.true_divide(data1, data2, out=data1.copy(), where=data2 > .5))) arr1 = tensor(data1.copy(), chunk_size=4) arr2 = tensor(data2.copy(), chunk_size=4) arr3 = add(arr1, arr2, where=arr1 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data1, data2, where=data1 > .5) self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5])) arr1 = tensor(data1.copy(), chunk_size=4) arr3 = add(arr1, 1, where=arr1 > .5) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data1, 1, where=data1 > .5) self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5])) arr1 = tensor(data2.copy(), chunk_size=3) arr3 = add(arr1[:5, :], 1, out=arr1[-5:, :]) res = self.executor.execute_tensor(arr3, concat=True)[0] expected = np.add(data2[:5, :], 1) self.assertTrue(np.array_equal(res, expected)) def testArctan2Execution(self): x = tensor(1) # scalar y = arctan2(x, x) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(1, 1)) y = arctan2(0, x) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(0, 1)) raw1 = np.array([[0, 1, 2]]) raw2 = sps.csr_matrix([[0, 1, 0]]) y = arctan2(raw1, raw2) self.assertFalse(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(raw1, raw2.A)) y = arctan2(raw2, raw2) self.assertTrue(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(raw2.A, raw2.A)) y = arctan2(0, raw2) self.assertTrue(y.issparse()) result = self.executor.execute_tensor(y, concat=True)[0] np.testing.assert_equal(result, np.arctan2(0, raw2.A)) def testFrexpExecution(self): data1 = np.random.random((5, 9, 4)) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = frexp(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1)) self.assertTrue(np.allclose(res, expected)) arr1 = tensor(data1.copy(), chunk_size=3) o1 = zeros(data1.shape, chunk_size=3) o2 = zeros(data1.shape, dtype='i8', chunk_size=3) frexp(arr1, o1, o2) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1)) self.assertTrue(np.allclose(res, expected)) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = frexp(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.frexp(data1.toarray())) np.testing.assert_equal(res.toarray(), expected) def testFrexpOrderExecution(self): data1 = np.random.random((5, 9)) t = tensor(data1, chunk_size=3) o1, o2 = frexp(t, order='F') res1, res2 = self.executor.execute_tileables([o1, o2]) expected1, expected2 = np.frexp(data1, order='F') np.testing.assert_allclose(res1, expected1) self.assertTrue(res1.flags['F_CONTIGUOUS']) self.assertFalse(res1.flags['C_CONTIGUOUS']) np.testing.assert_allclose(res2, expected2) self.assertTrue(res2.flags['F_CONTIGUOUS']) self.assertFalse(res2.flags['C_CONTIGUOUS']) def testModfExecution(self): data1 = np.random.random((5, 9)) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = modf(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1)) self.assertTrue(np.allclose(res, expected)) o1, o2 = modf([0, 3.5]) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf([0, 3.5])) self.assertTrue(np.allclose(res, expected)) arr1 = tensor(data1.copy(), chunk_size=3) o1 = zeros(data1.shape, chunk_size=3) o2 = zeros(data1.shape, chunk_size=3) modf(arr1, o1, o2) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1)) self.assertTrue(np.allclose(res, expected)) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=3) o1, o2 = modf(arr1) o = o1 + o2 res = self.executor.execute_tensor(o, concat=True)[0] expected = sum(np.modf(data1.toarray())) np.testing.assert_equal(res.toarray(), expected) def testModfOrderExecution(self): data1 = np.random.random((5, 9)) t = tensor(data1, chunk_size=3) o1, o2 = modf(t, order='F') res1, res2 = self.executor.execute_tileables([o1, o2]) expected1, expected2 = np.modf(data1, order='F') np.testing.assert_allclose(res1, expected1) self.assertTrue(res1.flags['F_CONTIGUOUS']) self.assertFalse(res1.flags['C_CONTIGUOUS']) np.testing.assert_allclose(res2, expected2) self.assertTrue(res2.flags['F_CONTIGUOUS']) self.assertFalse(res2.flags['C_CONTIGUOUS']) def testClipExecution(self): a_data = np.arange(10) a = tensor(a_data.copy(), chunk_size=3) b = clip(a, 1, 8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, 1, 8) self.assertTrue(np.array_equal(res, expected)) a = tensor(a_data.copy(), chunk_size=3) clip(a, 3, 6, out=a) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.clip(a_data, 3, 6) self.assertTrue(np.array_equal(res, expected)) a = tensor(a_data.copy(), chunk_size=3) a_min_data = np.random.randint(1, 10, size=(10, )) a_max_data = np.random.randint(1, 10, size=(10, )) a_min = tensor(a_min_data) a_max = tensor(a_max_data) clip(a, a_min, a_max, out=a) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.clip(a_data, a_min_data, a_max_data) self.assertTrue(np.array_equal(res, expected)) with option_context() as options: options.chunk_size = 3 a = tensor(a_data.copy(), chunk_size=3) b = clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8) self.assertTrue(np.array_equal(res, expected)) # test sparse clip a_data = sps.csr_matrix([[0, 2, 8], [0, 0, -1]]) a = tensor(a_data, chunk_size=3) b_data = sps.csr_matrix([[0, 3, 0], [1, 0, -2]]) c = clip(a, b_data, 4) res = self.executor.execute_tensor(c, concat=True)[0] expected = np.clip(a_data.toarray(), b_data.toarray(), 4) self.assertTrue(np.array_equal(res.toarray(), expected)) def testClipOrderExecution(self): a_data = np.asfortranarray(np.random.rand(4, 8)) a = tensor(a_data, chunk_size=3) b = clip(a, 0.2, 0.8) res = self.executor.execute_tensor(b, concat=True)[0] expected = np.clip(a_data, 0.2, 0.8) np.testing.assert_allclose(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testAroundExecution(self): data = np.random.randn(10, 20) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data, decimals=2) np.testing.assert_allclose(res, expected) data = sps.random(10, 20, density=.2) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data.toarray(), decimals=2) np.testing.assert_allclose(res.toarray(), expected) def testAroundOrderExecution(self): data = np.asfortranarray(np.random.rand(10, 20)) x = tensor(data, chunk_size=3) t = x.round(2) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.around(data, decimals=2) np.testing.assert_allclose(res, expected) self.assertTrue(res.flags['F_CONTIGUOUS']) self.assertFalse(res.flags['C_CONTIGUOUS']) def testCosOrderExecution(self): data = np.asfortranarray(np.random.rand(3, 5)) x = tensor(data, chunk_size=2) t = cos(x) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_allclose(res, np.cos(data)) self.assertFalse(res.flags['C_CONTIGUOUS']) self.assertTrue(res.flags['F_CONTIGUOUS']) t2 = cos(x, order='C') res2 = self.executor.execute_tensor(t2, concat=True)[0] np.testing.assert_allclose(res2, np.cos(data, order='C')) self.assertTrue(res2.flags['C_CONTIGUOUS']) self.assertFalse(res2.flags['F_CONTIGUOUS']) def testIsCloseExecution(self): data = np.array([1.05, 1.0, 1.01, np.nan]) data2 = np.array([1.04, 1.0, 1.03, np.nan]) x = tensor(data, chunk_size=2) y = tensor(data2, chunk_size=3) z = isclose(x, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, data2, atol=.01) np.testing.assert_equal(res, expected) z = isclose(x, y, atol=.01, equal_nan=True) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, data2, atol=.01, equal_nan=True) np.testing.assert_equal(res, expected) # test tensor with scalar z = isclose(x, 1.0, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data, 1.0, atol=.01) np.testing.assert_equal(res, expected) z = isclose(1.0, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(1.0, data2, atol=.01) np.testing.assert_equal(res, expected) z = isclose(1.0, 2.0, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(1.0, 2.0, atol=.01) np.testing.assert_equal(res, expected) # test sparse data = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) data2 = sps.csr_matrix(np.array([0, 1.0, 1.03, np.nan])) x = tensor(data, chunk_size=2) y = tensor(data2, chunk_size=3) z = isclose(x, y, atol=.01) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data.toarray(), data2.toarray(), atol=.01) np.testing.assert_equal(res, expected) z = isclose(x, y, atol=.01, equal_nan=True) res = self.executor.execute_tensor(z, concat=True)[0] expected = np.isclose(data.toarray(), data2.toarray(), atol=.01, equal_nan=True) np.testing.assert_equal(res, expected) @ignore_warning def testDtypeExecution(self): a = ones((10, 20), dtype='f4', chunk_size=5) c = truediv(a, 2, dtype='f8') res = self.executor.execute_tensor(c, concat=True)[0] self.assertEqual(res.dtype, np.float64) c = truediv(a, 0, dtype='f8') res = self.executor.execute_tensor(c, concat=True)[0] self.assertTrue(np.isinf(res[0, 0])) with self.assertRaises(FloatingPointError): with np.errstate(divide='raise'): c = truediv(a, 0, dtype='f8') _ = self.executor.execute_tensor(c, concat=True)[0] # noqa: F841 def testSetGetRealExecution(self): a_data = np.array([1 + 2j, 3 + 4j, 5 + 6j]) a = tensor(a_data, chunk_size=2) res = self.executor.execute_tensor(a.real, concat=True)[0] expected = a_data.real np.testing.assert_equal(res, expected) a.real = 9 res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.real = 9 np.testing.assert_equal(res, expected) a.real = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.real = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) # test sparse a_data = np.array([[1 + 2j, 3 + 4j, 0], [0, 0, 0]]) a = tensor(sps.csr_matrix(a_data)) res = self.executor.execute_tensor(a.real, concat=True)[0].toarray() expected = a_data.real np.testing.assert_equal(res, expected) a.real = 9 res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.real = 9 np.testing.assert_equal(res, expected) a.real = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.real = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) def testSetGetImagExecution(self): a_data = np.array([1 + 2j, 3 + 4j, 5 + 6j]) a = tensor(a_data, chunk_size=2) res = self.executor.execute_tensor(a.imag, concat=True)[0] expected = a_data.imag np.testing.assert_equal(res, expected) a.imag = 9 res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.imag = 9 np.testing.assert_equal(res, expected) a.imag = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0] expected = a_data.copy() expected.imag = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) # test sparse a_data = np.array([[1 + 2j, 3 + 4j, 0], [0, 0, 0]]) a = tensor(sps.csr_matrix(a_data)) res = self.executor.execute_tensor(a.imag, concat=True)[0].toarray() expected = a_data.imag np.testing.assert_equal(res, expected) a.imag = 9 res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.imag = 9 np.testing.assert_equal(res, expected) a.imag = np.array([9, 8, 7]) res = self.executor.execute_tensor(a, concat=True)[0].toarray() expected = a_data.copy() expected.imag = np.array([9, 8, 7]) np.testing.assert_equal(res, expected) def testTreeArithmeticExecution(self): raws = [np.random.rand(10, 10) for _ in range(10)] tensors = [tensor(a, chunk_size=3) for a in raws] res = self.executor.execute_tensor(tree_add(*tensors, 1.0), concat=True)[0] np.testing.assert_array_almost_equal( res, 1.0 + functools.reduce(operator.add, raws)) res = self.executor.execute_tensor(tree_multiply(*tensors, 2.0), concat=True)[0] np.testing.assert_array_almost_equal( res, 2.0 * functools.reduce(operator.mul, raws)) raws = [sps.random(5, 9, density=.1) for _ in range(10)] tensors = [tensor(a, chunk_size=3) for a in raws] res = self.executor.execute_tensor(tree_add(*tensors), concat=True)[0] np.testing.assert_array_almost_equal( res.toarray(), functools.reduce(operator.add, raws).toarray()) @require_cupy def testCupyExecution(self): a_data = np.random.rand(10, 10) b_data = np.random.rand(10, 10) a = tensor(a_data, gpu=True, chunk_size=3) b = tensor(b_data, gpu=True, chunk_size=3) res_binary = self.executor.execute_tensor((a + b), concat=True)[0] np.testing.assert_array_equal(res_binary.get(), (a_data + b_data)) res_unary = self.executor.execute_tensor(cos(a), concat=True)[0] np.testing.assert_array_almost_equal(res_unary.get(), np.cos(a_data))
def testOptimizedHeadTail(self): import sqlalchemy as sa with tempfile.TemporaryDirectory() as tempdir: executor = ExecutorForTest(storage=self.executor.storage) filename = os.path.join(tempdir, 'test_head.csv') rs = np.random.RandomState(0) pd_df = pd.DataFrame({ 'a': rs.randint(1000, size=(100, )).astype(np.int64), 'b': rs.randint(1000, size=(100, )).astype(np.int64), 'c': ['sss' for _ in range(100)], 'd': ['eeee' for _ in range(100)] }) pd_df.to_csv(filename, index=False) size = os.path.getsize(filename) chunk_bytes = size / 3 df = md.read_csv(filename, chunk_bytes=chunk_bytes) # test DataFrame.head r = df.head(3) with self._inject_execute_data_source(3, DataFrameReadCSV): result = executor.execute_tileables([r])[0] expected = pd_df.head(3) pd.testing.assert_frame_equal(result, expected) # test DataFrame.tail r = df.tail(3) result = executor.execute_tileables([r])[0] expected = pd_df.tail(3) pd.testing.assert_frame_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) # test head more than 1 chunk r = df.head(99) result = executor.execute_tileables([r])[0] result.reset_index(drop=True, inplace=True) expected = pd_df.head(99) pd.testing.assert_frame_equal(result, expected) # test Series.tail more than 1 chunk r = df.tail(99) result = executor.execute_tileables([r])[0] expected = pd_df.tail(99) pd.testing.assert_frame_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) filename = os.path.join(tempdir, 'test_sql.db') conn = sa.create_engine('sqlite:///' + filename) pd_df.to_sql('test_sql', conn) df = md.read_sql('test_sql', conn, index_col='index', chunk_size=20) # test DataFrame.head r = df.head(3) with self._inject_execute_data_source(3, DataFrameReadSQL): result = executor.execute_tileables([r])[0] result.index.name = None expected = pd_df.head(3) pd.testing.assert_frame_equal(result, expected)
class Test(TestBase): def setUp(self) -> None: super().setUp() self.executor = ExecutorForTest('numpy') self.ctx, self.executor = self._create_test_context(self.executor) self.ctx.__enter__() def tearDown(self) -> None: self.ctx.__exit__(None, None, None) def testRemoteFunction(self): def f1(x): return x + 1 def f2(x, y, z=None): return x * y * (z[0] + z[1]) rs = np.random.RandomState(0) raw1 = rs.rand(10, 10) raw2 = rs.rand(10, 10) r1 = spawn(f1, raw1) r2 = spawn(f1, raw2) r3 = spawn(f2, (r1, r2), {'z': [r1, r2]}) result = self.executor.execute_tileables([r3])[0] expected = (raw1 + 1) * (raw2 + 1) * (raw1 + 1 + raw2 + 1) np.testing.assert_almost_equal(result, expected) with self.assertRaises(TypeError): spawn(f2, (r1, r2), kwargs=()) session = new_session() def f(): assert Session.default.session_id == session.session_id return mt.ones((2, 3)).sum().to_numpy() self.assertEqual( spawn(f).execute(session=session).fetch(session=session), 6) def testMultiOutput(self): sentences = ['word1 word2', 'word2 word3', 'word3 word2 word1'] def mapper(s): word_to_count = defaultdict(lambda: 0) for word in s.split(): word_to_count[word] += 1 downsides = [defaultdict(lambda: 0), defaultdict(lambda: 0)] for word, count in word_to_count.items(): downsides[mmh3_hash(word) % 2][word] += count return downsides def reducer(word_to_count_list): d = defaultdict(lambda: 0) for word_to_count in word_to_count_list: for word, count in word_to_count.items(): d[word] += count return dict(d) outs = [], [] for sentence in sentences: out1, out2 = spawn(mapper, sentence, n_output=2) outs[0].append(out1) outs[1].append(out2) rs = [] for out in outs: r = spawn(reducer, out) rs.append(r) result = dict() for wc in ExecutableTuple(rs).to_object(): result.update(wc) self.assertEqual(result, {'word1': 2, 'word2': 3, 'word3': 2}) def testChainedRemote(self): def f(x): return x + 1 def g(x): return x * 2 s = spawn(g, spawn(f, 2)) result = self.executor.execute_tileables([s])[0] self.assertEqual(result, 6) def testInputTileable(self): def f(t, x): return (t * x).sum().to_numpy() rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1.sum(axis=0) s = spawn(f, args=(t2, 3)) sess = new_session() sess._sess._executor = ExecutorForTest('numpy', storage=sess._context) result = s.execute(session=sess).fetch(session=sess) expected = (raw.sum(axis=0) * 3).sum() self.assertAlmostEqual(result, expected) df1 = md.DataFrame(raw, chunk_size=3) df1.execute(session=sess) df2 = shuffle(df1) df2.execute(session=sess) def f2(input_df): bonus = input_df.iloc[:, 0].fetch().sum() return input_df.sum().to_pandas() + bonus for df in [df1, df2]: s = spawn(f2, args=(df, )) result = s.execute(session=sess).fetch(session=sess) expected = pd.DataFrame(raw).sum() + raw[:, 0].sum() pd.testing.assert_series_equal(result, expected)
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') self.old_chunk = options.chunk_size options.chunk_size = 10 def tearDown(self): options.chunk_size = self.old_chunk def testBoolIndexingExecution(self): raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=3) index = arr < .5 arr2 = arr[index] size_res = self.executor.execute_tensor(arr2, mock=True) res = self.executor.execute_tensor(arr2) self.assertEqual(sum(s[0] for s in size_res), arr.nbytes) np.testing.assert_array_equal(np.sort(np.concatenate(res)), np.sort(raw[raw < .5])) index2 = tensor(raw[:, :, 0, 0], chunk_size=3) < .5 arr3 = arr[index2] res = self.executor.execute_tensor(arr3, concat=True)[0] expected = raw[raw[:, :, 0, 0] < .5] self.assertEqual(sum(it.size for it in res), expected.size) self.assertEqual(res.shape, expected.shape) raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=3) index = tensor(raw[:, :, 0, 0], chunk_size=3) < .5 arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw[raw[:, :, 0, 0] < .5].copy('A') self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testFancyIndexingNumpyExecution(self): # test fancy index of type numpy ndarray raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) index = [9, 10, 3, 1, 8, 10] arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[index]) index = np.random.permutation(8) arr3 = arr[:2, ..., index] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[:2, ..., index]) index = [1, 3, 9, 10] arr4 = arr[..., index, :5] res = self.executor.execute_tensor(arr4, concat=True)[0] np.testing.assert_array_equal(res, raw[..., index, :5]) index1 = [8, 10, 3, 1, 9, 10] index2 = [1, 3, 9, 10, 2, 7] arr5 = arr[index1, :, index2] res = self.executor.execute_tensor(arr5, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [1, 3, 5, 7, 9, 10] index2 = [1, 9, 9, 10, 2, 7] arr6 = arr[index1, :, index2] res = self.executor.execute_tensor(arr6, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) # fancy index is ordered, no concat required self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1) index1 = [[8, 10, 3], [1, 9, 10]] index2 = [[1, 3, 9], [10, 2, 7]] arr7 = arr[index1, :, index2] res = self.executor.execute_tensor(arr7, concat=True)[0] np.testing.assert_array_equal(res, raw[index1, :, index2]) index1 = [[1, 3], [3, 7], [7, 7]] index2 = [1, 9] arr8 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr8, concat=True)[0] np.testing.assert_array_equal(res, raw[0, index1, :, index2]) def testFancyIndexingTensorExecution(self): # test fancy index of type tensor raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=4) arr2 = arr[index] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index]) raw_index = np.random.permutation(8) index = tensor(raw_index, chunk_size=3) arr3 = arr[:2, ..., index] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[:2, ..., raw_index]) raw_index = [1, 3, 9, 10] index = tensor(raw_index) arr4 = arr[..., index, :5] res = self.executor.execute_tensor(arr4, concat=True)[0] np.testing.assert_array_equal(res, raw[..., raw_index, :5]) raw_index1 = [8, 10, 3, 1, 9, 10] raw_index2 = [1, 3, 9, 10, 2, 7] index1 = tensor(raw_index1, chunk_size=4) index2 = tensor(raw_index2, chunk_size=3) arr5 = arr[index1, :, index2] res = self.executor.execute_tensor(arr5, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [1, 3, 5, 7, 9, 10] raw_index2 = [1, 9, 9, 10, 2, 7] index1 = tensor(raw_index1, chunk_size=3) index2 = tensor(raw_index2, chunk_size=4) arr6 = arr[index1, :, index2] res = self.executor.execute_tensor(arr6, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [[8, 10, 3], [1, 9, 10]] raw_index2 = [[1, 3, 9], [10, 2, 7]] index1 = tensor(raw_index1) index2 = tensor(raw_index2, chunk_size=2) arr7 = arr[index1, :, index2] res = self.executor.execute_tensor(arr7, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2]) raw_index1 = [[1, 3], [3, 7], [7, 7]] raw_index2 = [1, 9] index1 = tensor(raw_index1, chunk_size=(2, 1)) index2 = tensor(raw_index2) arr8 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr8, concat=True)[0] np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2]) raw_a = np.random.rand(30, 30) a = tensor(raw_a, chunk_size=(13, 17)) b = a.argmax(axis=0) c = a[b, arange(30)] res = self.executor.execute_tensor(c, concat=True)[0] np.testing.assert_array_equal( res, raw_a[raw_a.argmax(axis=0), np.arange(30)]) # test one chunk arr = tensor(raw, chunk_size=20) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=20) arr9 = arr[index] res = self.executor.execute_tensor(arr9, concat=True)[0] np.testing.assert_array_equal(res, raw[raw_index]) raw_index1 = [[1, 3], [3, 7], [7, 7]] raw_index2 = [1, 9] index1 = tensor(raw_index1) index2 = tensor(raw_index2) arr10 = arr[0, index1, :, index2] res = self.executor.execute_tensor(arr10, concat=True)[0] np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2]) # test order raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=(2, 3, 2, 3)) raw_index = [8, 10, 3, 1, 9, 10] index = tensor(raw_index, chunk_size=4) arr11 = arr[index] res = self.executor.execute_tensor(arr11, concat=True)[0] expected = raw[raw_index].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testSliceExecution(self): raw = np.random.random((11, 8, 12, 14)) arr = tensor(raw, chunk_size=3) arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_array_equal(res, raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]) arr3 = arr[-4, 2:] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_equal(res, raw[-4, 2:]) raw = sps.random(12, 14, density=.1) arr = tensor(raw, chunk_size=3) arr2 = arr[-1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] np.testing.assert_equal(res.toarray(), raw.toarray()[-1:-9:-2, 12:-11:-4]) # test order raw = np.asfortranarray(np.random.random((11, 8, 12, 14))) arr = tensor(raw, chunk_size=3) arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4] res = self.executor.execute_tensor(arr2, concat=True)[0] expected = raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) arr3 = arr[0:13, :, None] res = self.executor.execute_tensor(arr3, concat=True)[0] expected = raw[0:13, :, None].copy('A') np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testMixedIndexingExecution(self): rs = np.random.RandomState(0) raw = rs.random((11, 8, 12, 13)) arr = tensor(raw, chunk_size=3) raw_cond = raw[0, :, 0, 0] < .5 cond = tensor(raw[0, :, 0, 0], chunk_size=3) < .5 arr2 = arr[10::-2, cond, None, ..., :5] size_res = self.executor.execute_tensor(arr2, mock=True) res = self.executor.execute_tensor(arr2, concat=True)[0] new_shape = list(arr2.shape) new_shape[1] = cond.shape[0] self.assertEqual(sum(s[0] for s in size_res), int(np.prod(new_shape) * arr2.dtype.itemsize)) np.testing.assert_array_equal(res, raw[10::-2, raw_cond, None, ..., :5]) b_raw = np.random.random(8) raw_cond = b_raw < .5 conds = [raw_cond, tensor(b_raw, chunk_size=2) < .5] for cond in conds: arr3 = arr[-2::-3, cond, ...] res = self.executor.execute_tensor(arr3, concat=True)[0] np.testing.assert_array_equal(res, raw[-2::-3, raw_cond, ...]) # test multiple bool index and fancy index cond1 = np.zeros(11, dtype=bool) cond1[rs.permutation(11)[:5]] = True cond2 = np.zeros(12, dtype=bool) cond2[rs.permutation(12)[:5]] = True f3 = np.random.randint(13, size=5) expected = raw[cond1, ..., cond2, f3] t = arr[cond1, ..., cond2, f3] res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) ctx, executor = self._create_test_context(self.executor) with ctx: t = arr[tensor(cond1), ..., tensor(cond2), tensor(f3)] res = executor.execute_tensors([t])[0] np.testing.assert_array_equal(res, expected) def testSetItemExecution(self): rs = np.random.RandomState(0) raw = data = rs.randint(0, 10, size=(11, 8, 12, 13)) arr = tensor(raw.copy(), chunk_size=3) raw = raw.copy() idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2 arr[idx] = 20 res = self.executor.execute_tensor(arr, concat=True)[0] raw[idx] = 20 np.testing.assert_array_equal(res, raw) self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS']) raw = data shape = raw[idx].shape arr2 = tensor(raw.copy(), chunk_size=3) raw = raw.copy() replace = rs.randint(10, 20, size=shape[:-1] + (1, )).astype('f4') arr2[idx] = tensor(replace, chunk_size=4) res = self.executor.execute_tensor(arr2, concat=True)[0] raw[idx] = replace np.testing.assert_array_equal(res, raw) raw = np.asfortranarray(np.random.randint(0, 10, size=(11, 8, 12, 13))) arr = tensor(raw.copy('A'), chunk_size=3) raw = raw.copy('A') idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2 arr[idx] = 20 res = self.executor.execute_tensor(arr, concat=True)[0] raw[idx] = 20 np.testing.assert_array_equal(res, raw) self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS']) # test bool indexing set raw = data arr = tensor(raw.copy(), chunk_size=3) raw1 = rs.rand(11) arr[tensor(raw1, chunk_size=4) < 0.6, 2:7] = 3 res = self.executor.execute_tileable(arr, concat=True)[0] raw[raw1 < 0.6, 2:7] = 3 np.testing.assert_array_equal(res, raw) raw = np.random.randint(3, size=10).astype(np.int64) raw2 = np.arange(3) arr = zeros((10, 3)) arr[tensor(raw) == 1, tensor(raw2) == 1] = 1 res = self.executor.execute_tileable(arr, concat=True)[0] expected = np.zeros((10, 3)) expected[raw == 1, raw2 == 1] = 1 np.testing.assert_array_equal(res, expected) ctx, executor = self._create_test_context(self.executor) with ctx: raw = data arr = tensor(raw.copy(), chunk_size=3) raw1 = rs.rand(11) set_data = rs.rand((raw1 < 0.8).sum(), 8, 12, 13) arr[tensor(raw1, chunk_size=4) < 0.8] = tensor(set_data) res = self.executor.execute_tileables([arr])[0] raw[raw1 < 0.8] = set_data np.testing.assert_array_equal(res, raw) # test error with self.assertRaises(ValueError): t = tensor(raw, chunk_size=3) t[0, 0, 0, 0] = zeros(2, chunk_size=10) _ = self.executor.execute_tensor(t) def testSetItemStructuredExecution(self): rec_type = np.dtype([('a', np.int32), ('b', np.double), ('c', np.dtype([('a', np.int16), ('b', np.int64)]))]) raw = np.zeros((4, 5), dtype=rec_type) arr = tensor(raw.copy(), chunk_size=3) arr[1:4, 1] = (3, 4., (5, 6)) arr[1:4, 2] = 8 arr[1:3] = np.arange(5) arr[2:4] = np.arange(10).reshape(2, 5) arr[0] = np.arange(5) raw[1:4, 1] = (3, 4., (5, 6)) raw[1:4, 2] = 8 raw[1:3] = np.arange(5) raw[2:4] = np.arange(10).reshape(2, 5) raw[0] = np.arange(5) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(arr.dtype, raw.dtype) self.assertEqual(arr.shape, raw.shape) np.testing.assert_array_equal(res, raw) def testTakeExecution(self): data = np.random.rand(10, 20, 30) t = tensor(data, chunk_size=10) a = t.take([4, 1, 2, 6, 200]) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.take(data, [4, 1, 2, 6, 200]) np.testing.assert_array_equal(res, expected) a = take(t, [5, 19, 2, 13], axis=1) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.take(data, [5, 19, 2, 13], axis=1) np.testing.assert_array_equal(res, expected) with self.assertRaises(ValueError): take(t, [1, 3, 4], out=tensor(np.random.rand(4))) out = tensor([1, 2, 3, 4]) a = take(t, [4, 19, 2, 8], out=out) res = self.executor.execute_tensor(out, concat=True)[0] expected = np.take(data, [4, 19, 2, 8]) np.testing.assert_array_equal(res, expected) def testCompressExecution(self): data = np.array([[1, 2], [3, 4], [5, 6]]) a = tensor(data, chunk_size=1) t = compress([0, 1], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1], data, axis=0) np.testing.assert_array_equal(res, expected) t = compress([0, 1], a, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1], data, axis=1) np.testing.assert_array_equal(res, expected) t = a.compress([0, 1, 1]) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data) np.testing.assert_array_equal(res, expected) t = compress([False, True, True], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([False, True, True], data, axis=0) np.testing.assert_array_equal(res, expected) t = compress([False, True], a, axis=1) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([False, True], data, axis=1) np.testing.assert_array_equal(res, expected) with self.assertRaises(np.AxisError): compress([0, 1, 1], a, axis=1) # test order data = np.asfortranarray([[1, 2], [3, 4], [5, 6]]) a = tensor(data, chunk_size=1) t = compress([0, 1, 1], a, axis=0) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data, axis=0) np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) t = compress([0, 1, 1], a, axis=0, out=tensor(np.empty((2, 2), order='F', dtype=int))) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.compress([0, 1, 1], data, axis=0, out=np.empty((2, 2), order='F', dtype=int)) np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testExtractExecution(self): data = np.arange(12).reshape((3, 4)) a = tensor(data, chunk_size=2) condition = mod(a, 3) == 0 t = extract(condition, a) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.extract(np.mod(data, 3) == 0, data) np.testing.assert_array_equal(res, expected) def testChooseExecution(self): options.chunk_size = 2 choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]] a = choose([2, 3, 1, 0], choices) res = self.executor.execute_tensor(a, concat=True)[0] expected = np.choose([2, 3, 1, 0], choices) np.testing.assert_array_equal(res, expected) a = choose([2, 4, 1, 0], choices, mode='clip') # 4 goes to 3 (4-1) expected = np.choose([2, 4, 1, 0], choices, mode='clip') res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, expected) a = choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) expected = np.choose([2, 4, 1, 0], choices, mode='wrap') # 4 goes to (4 mod 4) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, expected) a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]] choices = [-10, 10] b = choose(a, choices) expected = np.choose(a, choices) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) a = np.array([0, 1]).reshape((2, 1, 1)) c1 = np.array([1, 2, 3]).reshape((1, 3, 1)) c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5)) b = choose(a, (c1, c2)) expected = np.choose(a, (c1, c2)) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) # test order a = np.array([0, 1]).reshape((2, 1, 1), order='F') c1 = np.array([1, 2, 3]).reshape((1, 3, 1), order='F') c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5), order='F') b = choose(a, (c1, c2)) expected = np.choose(a, (c1, c2)) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) b = choose(a, (c1, c2), out=tensor(np.empty(res.shape, order='F'))) expected = np.choose(a, (c1, c2), out=np.empty(res.shape, order='F')) res = self.executor.execute_tensor(b, concat=True)[0] np.testing.assert_array_equal(res, expected) self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS']) self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS']) def testUnravelExecution(self): a = tensor([22, 41, 37], chunk_size=1) t = stack(unravel_index(a, (7, 6))) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.stack(np.unravel_index([22, 41, 37], (7, 6))) np.testing.assert_array_equal(res, expected) def testNonzeroExecution(self): data = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]]) x = tensor(data, chunk_size=2) t = hstack(nonzero(x)) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.hstack(np.nonzero(data)) np.testing.assert_array_equal(res, expected) t = hstack((x > 1).nonzero()) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.hstack(np.nonzero(data > 1)) np.testing.assert_array_equal(res, expected) def testFlatnonzeroExecution(self): x = arange(-2, 3, chunk_size=2) t = flatnonzero(x) res = self.executor.execute_tensor(t, concat=True)[0] expected = np.flatnonzero(np.arange(-2, 3)) np.testing.assert_equal(res, expected) def testFillDiagonalExecution(self): # 2-d raws = [ np.random.rand(30, 11), np.random.rand(15, 15), np.random.rand(11, 30), sps.random(30, 11, density=0.1, format='csr') ] def copy(x): if hasattr(x, 'nnz'): # sparse return x.A else: return x.copy() for raw in raws: # test 1 chunk, wrap=False t = tensor(raw, chunk_size=30) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) # test 1 chunk, wrap=True t = tensor(raw, chunk_size=30) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunks, wrap=False t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) t = tensor(raw, chunk_size=(4, 12)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with list type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, [1, 2, 3]) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with tensor type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, tensor([1, 2, 3])) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunks, wrap=True t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) t = tensor(raw, chunk_size=(4, 12)) fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, 1, wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with list type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, [1, 2, 3], wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3], wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # test multiple chunk, val with tensor type t = tensor(raw, chunk_size=(12, 4)) fill_diagonal(t, tensor([[1, 2], [3, 4]]), wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] expected = copy(raw) np.fill_diagonal(expected, [1, 2, 3, 4], wrap=True) np.testing.assert_array_equal(np.asarray(res), expected) # 3-d raw = np.random.rand(11, 11, 11) expected = raw.copy() np.fill_diagonal(expected, 1) expected2 = raw.copy() np.fill_diagonal(expected2, 1, wrap=True) np.testing.assert_array_equal(expected, expected2) # test 1 chunk t = tensor(raw, chunk_size=30) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) t = tensor(raw, chunk_size=30) # wrap = True does not take effect when ndim > 2 fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) # test multiple chunk t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, 1) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) t = tensor(raw, chunk_size=(3, 4, 5)) # wrap = True does not take effect when ndim > 2 fill_diagonal(t, 1, wrap=True) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, expected) # test val with list type t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, [[1, 2], [3, 4]]) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, [1, 2, 3, 4]) np.testing.assert_array_equal(res, expected) # test val with tensor type t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, tensor([1, 2, 3])) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, [1, 2, 3]) np.testing.assert_array_equal(res, expected) # test val with tensor type which ndim == 0 t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, tensor([1, 2, 3]).sum()) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, 6) np.testing.assert_array_equal(res, expected) # test val with ndarray type which size is too long t = tensor(raw, chunk_size=(3, 4, 5)) fill_diagonal(t, np.arange(20)) res = self.executor.execute_tensor(t, concat=True)[0] expected = raw.copy() np.fill_diagonal(expected, np.arange(20)) np.testing.assert_array_equal(res, expected)
class Test(TestBase): def setUp(self) -> None: super().setUp() self.executor = ExecutorForTest('numpy') self.ctx, self.executor = self._create_test_context(self.executor) self.ctx.__enter__() def tearDown(self) -> None: self.ctx.__exit__(None, None, None) def testRemoteFunction(self): def f1(x): return x + 1 def f2(x, y, z=None): return x * y * (z[0] + z[1]) rs = np.random.RandomState(0) raw1 = rs.rand(10, 10) raw2 = rs.rand(10, 10) r1 = spawn(f1, raw1) r2 = spawn(f1, raw2) r3 = spawn(f2, (r1, r2), {'z': [r1, r2]}) result = self.executor.execute_tileables([r3])[0] expected = (raw1 + 1) * (raw2 + 1) * (raw1 + 1 + raw2 + 1) np.testing.assert_almost_equal(result, expected) with self.assertRaises(TypeError): spawn(f2, (r1, r2), kwargs=()) session = new_session() def f(): assert Session.default.session_id == session.session_id return mt.ones((2, 3)).sum().to_numpy() self.assertEqual( spawn(f).execute(session=session).fetch(session=session), 6) def testMultiOutput(self): sentences = ['word1 word2', 'word2 word3', 'word3 word2 word1'] def mapper(s): word_to_count = defaultdict(lambda: 0) for word in s.split(): word_to_count[word] += 1 downsides = [defaultdict(lambda: 0), defaultdict(lambda: 0)] for word, count in word_to_count.items(): downsides[mmh3_hash(word) % 2][word] += count return downsides def reducer(word_to_count_list): d = defaultdict(lambda: 0) for word_to_count in word_to_count_list: for word, count in word_to_count.items(): d[word] += count return dict(d) outs = [], [] for sentence in sentences: out1, out2 = spawn(mapper, sentence, n_output=2) outs[0].append(out1) outs[1].append(out2) rs = [] for out in outs: r = spawn(reducer, out) rs.append(r) result = dict() for wc in ExecutableTuple(rs).execute().fetch(): result.update(wc) self.assertEqual(result, {'word1': 2, 'word2': 3, 'word3': 2}) def testChainedRemote(self): def f(x): return x + 1 def g(x): return x * 2 s = spawn(g, spawn(f, 2)) result = self.executor.execute_tileables([s])[0] self.assertEqual(result, 6)
class Test(unittest.TestCase): def setUp(self) -> None: this = self class MockSession: @property def executor(self): return this.executor self.ctx = ctx = LocalContext(MockSession()) self.executor = ExecutorForTest('numpy', storage=ctx) ctx.__enter__() def tearDown(self) -> None: self.ctx.__exit__(None, None, None) def test__check_targets(self): # Check that _check_targets correctly merges target types, squeezes # output and fails if input lengths differ. IND = 'multilabel-indicator' MC = 'multiclass' BIN = 'binary' CNT = 'continuous' MMC = 'multiclass-multioutput' MCN = 'continuous-multioutput' # all of length 3 EXAMPLES = [ (IND, np.array([[0, 1, 1], [1, 0, 0], [0, 0, 1]])), # must not be considered binary (IND, np.array([[0, 1], [1, 0], [1, 1]])), (MC, [2, 3, 1]), (BIN, [0, 1, 1]), (CNT, [0., 1.5, 1.]), (MC, np.array([[2], [3], [1]])), (BIN, np.array([[0], [1], [1]])), (CNT, np.array([[0.], [1.5], [1.]])), (MMC, np.array([[0, 2], [1, 3], [2, 3]])), (MCN, np.array([[0.5, 2.], [1.1, 3.], [2., 3.]])), ] # expected type given input types, or None for error # (types will be tried in either order) EXPECTED = { (IND, IND): IND, (MC, MC): MC, (BIN, BIN): BIN, (MC, IND): None, (BIN, IND): None, (BIN, MC): MC, # Disallowed types (CNT, CNT): None, (MMC, MMC): None, (MCN, MCN): None, (IND, CNT): None, (MC, CNT): None, (BIN, CNT): None, (MMC, CNT): None, (MCN, CNT): None, (IND, MMC): None, (MC, MMC): None, (BIN, MMC): None, (MCN, MMC): None, (IND, MCN): None, (MC, MCN): None, (BIN, MCN): None, } for (type1, y1), (type2, y2) in product(EXAMPLES, repeat=2): try: expected = EXPECTED[type1, type2] except KeyError: expected = EXPECTED[type2, type1] if expected is None: with self.assertRaises(ValueError): self.executor.execute_tileables(_check_targets(y1, y2)) if type1 != type2: with self.assertRaises(ValueError): self.executor.execute_tileables(_check_targets(y1, y2)) else: if type1 not in (BIN, MC, IND): with self.assertRaises(ValueError): self.executor.execute_tileables( _check_targets(y1, y2)) else: merged_type, y1out, y2out = \ self.executor.execute_tileables(_check_targets(y1, y2)) assert merged_type == expected if merged_type.item().startswith('multilabel'): self.assertIsInstance(y1out, SparseNDArray) self.assertIsInstance(y2out, SparseNDArray) else: np.testing.assert_array_equal(y1out, np.squeeze(y1)) np.testing.assert_array_equal(y2out, np.squeeze(y2)) with self.assertRaises(ValueError): self.executor.execute_tileables(_check_targets( y1[:-1], y2)) @unittest.skipIf(sklearn is None, 'scikit-learn not installed') def testAccuracyScore(self): y_pred = [0, 2, 1, 3] y_true = [0, 1, 2, 3] score = accuracy_score(y_true, y_pred) result = self.executor.execute_tileables([score])[0] expected = sklearn_accuracy_score(y_true, y_pred) self.assertAlmostEqual(result, expected) score = accuracy_score(y_true, y_pred, normalize=False) result = self.executor.execute_tileables([score])[0] expected = sklearn_accuracy_score(y_true, y_pred, normalize=False) self.assertAlmostEqual(result, expected) y_pred = np.array([[0, 1], [1, 1]]) y_true = np.ones((2, 2)) score = accuracy_score(y_true, y_pred) result = self.executor.execute_tileables([score])[0] expected = sklearn_accuracy_score(y_true, y_pred) self.assertAlmostEqual(result, expected) sample_weight = [0.7, 0.3] score = accuracy_score(y_true, y_pred, sample_weight=sample_weight) result = self.executor.execute_tileables([score])[0] expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight) self.assertAlmostEqual(result, expected) score = accuracy_score(mt.tensor(y_true), mt.tensor(y_pred), sample_weight=mt.tensor(sample_weight), normalize=False) result = self.executor.execute_tileables([score])[0] expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight, normalize=False) self.assertAlmostEqual(result, expected)
class Test(TestBase): def setUp(self): self.executor = ExecutorForTest('numpy') def testShuffleExpr(self): a = mt.random.rand(10, 3, chunk_size=2) b = md.DataFrame(mt.random.rand(10, 5), chunk_size=2) new_a, new_b = shuffle(a, b, random_state=0) self.assertIs(new_a.op, new_b.op) self.assertIsInstance(new_a.op, LearnShuffle) self.assertEqual(new_a.shape, a.shape) self.assertEqual(new_b.shape, b.shape) self.assertNotEqual(b.index_value.key, new_b.index_value.key) new_a = new_a.tiles() new_b = get_tiled(new_b) self.assertEqual(len(new_a.chunks), 10) self.assertTrue(np.isnan(new_a.chunks[0].shape[0])) self.assertEqual(len(new_b.chunks), 15) self.assertTrue(np.isnan(new_b.chunks[0].shape[0])) self.assertNotEqual(new_b.chunks[0].index_value.key, new_b.chunks[1].index_value.key) self.assertEqual(new_a.chunks[0].op.seeds, new_b.chunks[0].op.seeds) c = mt.random.rand(10, 5, 3, chunk_size=2) d = md.DataFrame(mt.random.rand(10, 5), chunk_size=(2, 5)) new_c, new_d = shuffle(c, d, axes=(0, 1), random_state=0) self.assertIs(new_c.op, new_d.op) self.assertIsInstance(new_c.op, LearnShuffle) self.assertEqual(new_c.shape, c.shape) self.assertEqual(new_d.shape, d.shape) self.assertNotEqual(d.index_value.key, new_d.index_value.key) self.assertFalse(np.all(new_d.dtypes.index[:-1] < new_d.dtypes.index[1:])) pd.testing.assert_series_equal(d.dtypes, new_d.dtypes.sort_index()) new_c = new_c.tiles() new_d = get_tiled(new_d) self.assertEqual(len(new_c.chunks), 5 * 1 * 2) self.assertTrue(np.isnan(new_c.chunks[0].shape[0])) self.assertEqual(len(new_d.chunks), 5) self.assertTrue(np.isnan(new_d.chunks[0].shape[0])) self.assertEqual(new_d.chunks[0].shape[1], 5) self.assertNotEqual(new_d.chunks[0].index_value.key, new_d.chunks[1].index_value.key) pd.testing.assert_series_equal(new_d.chunks[0].dtypes.sort_index(), d.dtypes) self.assertEqual(new_c.chunks[0].op.seeds, new_d.chunks[0].op.seeds) self.assertEqual(len(new_c.chunks[0].op.seeds), 1) self.assertEqual(new_c.chunks[0].op.reduce_sizes, (5,)) with self.assertRaises(ValueError): a = mt.random.rand(10, 5) b = mt.random.rand(10, 4, 3) shuffle(a, b, axes=1) with self.assertRaises(TypeError): shuffle(a, b, unknown_param=True) self.assertIsInstance(shuffle(mt.random.rand(10, 5)), mt.Tensor) @staticmethod def _sort(data, axes): cur = data for ax in axes: if ax < data.ndim: cur = np.sort(cur, axis=ax) return cur def testShuffleExecution(self): # test consistency s1 = np.arange(9).reshape(3, 3) s2 = np.arange(1, 10).reshape(3, 3) ts1 = mt.array(s1, chunk_size=2) ts2 = mt.array(s2, chunk_size=2) ret = shuffle(ts1, ts2, axes=[0, 1], random_state=0) res1, res2 = self.executor.execute_tileables(ret) # calc row index s1_col_0 = s1[:, 0].tolist() rs1_col_0 = [res1[:, i] for i in range(3) if set(s1_col_0) == set(res1[:, i])][0] row_index = [s1_col_0.index(j) for j in rs1_col_0] # calc col index s1_row_0 = s1[0].tolist() rs1_row_0 = [res1[i] for i in range(3) if set(s1_row_0) == set(res1[i])][0] col_index = [s1_row_0.index(j) for j in rs1_row_0] np.testing.assert_array_equal(res2, s2[row_index][:, col_index]) # tensor + tensor raw1 = np.random.rand(10, 15, 20) t1 = mt.array(raw1, chunk_size=8) raw2 = np.random.rand(10, 15, 20) t2 = mt.array(raw2, chunk_size=5) for axes in [(0,), (0, 1), (0, 2), (1, 2), (0, 1, 2)]: ret = shuffle(t1, t2, axes=axes, random_state=0) res1, res2 = self.executor.execute_tileables(ret) self.assertEqual(res1.shape, raw1.shape) self.assertEqual(res2.shape, raw2.shape) np.testing.assert_array_equal(Test._sort(raw1, axes), Test._sort(res1, axes)) np.testing.assert_array_equal(Test._sort(raw2, axes), Test._sort(res2, axes)) # tensor + tensor(more dimension) raw3 = np.random.rand(10, 15) t3 = mt.array(raw3, chunk_size=(8, 15)) raw4 = np.random.rand(10, 15, 20) t4 = mt.array(raw4, chunk_size=(5, 15, 10)) for axes in [(1,), (0, 1), (1, 2)]: ret = shuffle(t3, t4, axes=axes, random_state=0) res3, res4 = self.executor.execute_tileables(ret) self.assertEqual(res3.shape, raw3.shape) self.assertEqual(res4.shape, raw4.shape) np.testing.assert_array_equal(Test._sort(raw3, axes), Test._sort(res3, axes)) np.testing.assert_array_equal(Test._sort(raw4, axes), Test._sort(res4, axes)) # tensor + dataframe + series raw5 = np.random.rand(10, 15, 20) t5 = mt.array(raw5, chunk_size=8) raw6 = pd.DataFrame(np.random.rand(10, 15)) df = md.DataFrame(raw6, chunk_size=(8, 15)) raw7 = pd.Series(np.random.rand(10)) series = md.Series(raw7, chunk_size=8) for axes in [(0,), (1,), (0, 1), (1, 2), [0, 1, 2]]: ret = shuffle(t5, df, series, axes=axes, random_state=0) # skip check nsplits because it's updated res5, res_df, res_series = self.executor.execute_tileables(ret, check_nsplits=False) self.assertEqual(res5.shape, raw5.shape) self.assertEqual(res_df.shape, df.shape) self.assertEqual(res_series.shape, series.shape)