Example #1
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def _nan_equal(self, a, b):
        try:
            np.testing.assert_equal(a, b)
        except AssertionError:
            return False
        return True

    def testBaseExecution(self):
        arr = ones((10, 8), chunk_size=2)
        arr2 = arr + 1

        res = self.executor.execute_tensor(arr2)

        self.assertTrue((res[0] == np.ones((2, 2)) + 1).all())

        data = np.random.random((10, 8, 3))
        arr = tensor(data, chunk_size=2)
        arr2 = arr + 1

        res = self.executor.execute_tensor(arr2)

        self.assertTrue((res[0] == data[:2, :2, :2] + 1).all())

    def testBaseOrderExecution(self):
        raw = np.asfortranarray(np.random.rand(5, 6))
        arr = tensor(raw, chunk_size=3)

        res = self.executor.execute_tensor(arr + 1, concat=True)[0]
        np.testing.assert_array_equal(res, raw + 1)
        self.assertFalse(res.flags['C_CONTIGUOUS'])
        self.assertTrue(res.flags['F_CONTIGUOUS'])

        res2 = self.executor.execute_tensor(add(arr, 1, order='C'),
                                            concat=True)[0]
        np.testing.assert_array_equal(res2, np.add(raw, 1, order='C'))
        self.assertTrue(res2.flags['C_CONTIGUOUS'])
        self.assertFalse(res2.flags['F_CONTIGUOUS'])

    @staticmethod
    def _get_func(op):
        if isinstance(op, str):
            return getattr(np, op)
        return op

    def testUfuncExecution(self):
        from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \
            invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp

        _sp_unary_ufunc = {arccosh, invert}
        _sp_bin_ufunc = {
            mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp
        }

        data1 = np.random.random((5, 9, 4))
        data2 = np.random.random((5, 9, 4))
        rand = np.random.random()
        arr1 = tensor(data1, chunk_size=3)
        arr2 = tensor(data2, chunk_size=3)

        _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc
        for func in _new_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_func(res_tensor.op._func_name)(data1)
            self.assertTrue(np.allclose(res[0], expected))

        _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc
        for func in _new_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1)

            self.assertTrue(np.allclose(res1[0], expected1))
            self.assertTrue(np.allclose(res2[0], expected2))
            self.assertTrue(np.allclose(res3[0], expected3))

        data1 = np.random.randint(2, 10, size=(10, 10, 10))
        data2 = np.random.randint(2, 10, size=(10, 10, 10))
        rand = np.random.randint(1, 10)
        arr1 = tensor(data1, chunk_size=6)
        arr2 = tensor(data2, chunk_size=6)

        for func in _sp_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_func(res_tensor.op._func_name)(data1)
            self.assertTrue(np.allclose(res[0], expected))

        for func in _sp_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1)

            self.assertTrue(np.allclose(res1[0], expected1))
            self.assertTrue(np.allclose(res2[0], expected2))
            self.assertTrue(np.allclose(res3[0], expected3))

    @staticmethod
    def _get_sparse_func(op):
        from mars.lib.sparse.core import issparse

        if isinstance(op, str):
            op = getattr(np, op)

        def func(*args):
            new_args = []
            for arg in args:
                if issparse(arg):
                    new_args.append(arg.toarray())
                else:
                    new_args.append(arg)

            return op(*new_args)

        return func

    @staticmethod
    def toarray(x):
        if hasattr(x, 'toarray'):
            return x.toarray()
        return x

    @ignore_warning
    def testSparseUfuncExecution(self):
        from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \
            invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp

        _sp_unary_ufunc = {arccosh, invert}
        _sp_bin_ufunc = {
            mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp
        }

        data1 = sps.random(5, 9, density=.1)
        data2 = sps.random(5, 9, density=.2)
        rand = np.random.random()
        arr1 = tensor(data1, chunk_size=3)
        arr2 = tensor(data2, chunk_size=3)

        _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc
        for func in _new_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_sparse_func(res_tensor.op._func_name)(data1)
            self._nan_equal(self.toarray(res[0]), expected)

        _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc
        for func in _new_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1,
                                                                         data2)
            expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1,
                                                                         rand)
            expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand,
                                                                         data1)

            self._nan_equal(self.toarray(res1[0]), expected1)
            self._nan_equal(self.toarray(res2[0]), expected2)
            self._nan_equal(self.toarray(res3[0]), expected3)

        data1 = np.random.randint(2, 10, size=(10, 10))
        data2 = np.random.randint(2, 10, size=(10, 10))
        rand = np.random.randint(1, 10)
        arr1 = tensor(data1, chunk_size=3).tosparse()
        arr2 = tensor(data2, chunk_size=3).tosparse()

        for func in _sp_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_sparse_func(res_tensor.op._func_name)(data1)
            self._nan_equal(self.toarray(res[0]), expected)

        for func in _sp_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1,
                                                                         data2)
            expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1,
                                                                         rand)
            expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand,
                                                                         data1)

            self._nan_equal(self.toarray(res1[0]), expected1)
            self._nan_equal(self.toarray(res2[0]), expected2)
            self._nan_equal(self.toarray(res3[0]), expected3)

    def testAddWithOutExecution(self):
        data1 = np.random.random((5, 9, 4))
        data2 = np.random.random((9, 4))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        add(arr1, arr2, out=arr1)
        res = self.executor.execute_tensor(arr1, concat=True)[0]
        self.assertTrue(np.array_equal(res, data1 + data2))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        arr3 = add(arr1, arr2, out=arr1.astype('i4'), casting='unsafe')
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, (data1 + data2).astype('i4'))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        arr3 = truediv(arr1, arr2, out=arr1, where=arr2 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        self.assertTrue(
            np.array_equal(
                res,
                np.true_divide(data1,
                               data2,
                               out=data1.copy(),
                               where=data2 > .5)))

        arr1 = tensor(data1.copy(), chunk_size=4)
        arr2 = tensor(data2.copy(), chunk_size=4)

        arr3 = add(arr1, arr2, where=arr1 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data1, data2, where=data1 > .5)
        self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5]))

        arr1 = tensor(data1.copy(), chunk_size=4)

        arr3 = add(arr1, 1, where=arr1 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data1, 1, where=data1 > .5)
        self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5]))

        arr1 = tensor(data2.copy(), chunk_size=3)

        arr3 = add(arr1[:5, :], 1, out=arr1[-5:, :])
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data2[:5, :], 1)
        self.assertTrue(np.array_equal(res, expected))

    def testArctan2Execution(self):
        x = tensor(1)  # scalar
        y = arctan2(x, x)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(1, 1))

        y = arctan2(0, x)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(0, 1))

        raw1 = np.array([[0, 1, 2]])
        raw2 = sps.csr_matrix([[0, 1, 0]])
        y = arctan2(raw1, raw2)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(raw1, raw2.A))

        y = arctan2(raw2, raw2)

        self.assertTrue(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(raw2.A, raw2.A))

        y = arctan2(0, raw2)

        self.assertTrue(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(0, raw2.A))

    def testFrexpExecution(self):
        data1 = np.random.random((5, 9, 4))

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = frexp(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1))
        self.assertTrue(np.allclose(res, expected))

        arr1 = tensor(data1.copy(), chunk_size=3)
        o1 = zeros(data1.shape, chunk_size=3)
        o2 = zeros(data1.shape, dtype='i8', chunk_size=3)
        frexp(arr1, o1, o2)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1))
        self.assertTrue(np.allclose(res, expected))

        data1 = sps.random(5, 9, density=.1)

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = frexp(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1.toarray()))
        np.testing.assert_equal(res.toarray(), expected)

    def testFrexpOrderExecution(self):
        data1 = np.random.random((5, 9))
        t = tensor(data1, chunk_size=3)

        o1, o2 = frexp(t, order='F')
        res1, res2 = self.executor.execute_tileables([o1, o2])
        expected1, expected2 = np.frexp(data1, order='F')
        np.testing.assert_allclose(res1, expected1)
        self.assertTrue(res1.flags['F_CONTIGUOUS'])
        self.assertFalse(res1.flags['C_CONTIGUOUS'])
        np.testing.assert_allclose(res2, expected2)
        self.assertTrue(res2.flags['F_CONTIGUOUS'])
        self.assertFalse(res2.flags['C_CONTIGUOUS'])

    def testModfExecution(self):
        data1 = np.random.random((5, 9))

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = modf(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1))
        self.assertTrue(np.allclose(res, expected))

        o1, o2 = modf([0, 3.5])
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf([0, 3.5]))
        self.assertTrue(np.allclose(res, expected))

        arr1 = tensor(data1.copy(), chunk_size=3)
        o1 = zeros(data1.shape, chunk_size=3)
        o2 = zeros(data1.shape, chunk_size=3)
        modf(arr1, o1, o2)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1))
        self.assertTrue(np.allclose(res, expected))

        data1 = sps.random(5, 9, density=.1)

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = modf(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1.toarray()))
        np.testing.assert_equal(res.toarray(), expected)

    def testModfOrderExecution(self):
        data1 = np.random.random((5, 9))
        t = tensor(data1, chunk_size=3)

        o1, o2 = modf(t, order='F')
        res1, res2 = self.executor.execute_tileables([o1, o2])
        expected1, expected2 = np.modf(data1, order='F')
        np.testing.assert_allclose(res1, expected1)
        self.assertTrue(res1.flags['F_CONTIGUOUS'])
        self.assertFalse(res1.flags['C_CONTIGUOUS'])
        np.testing.assert_allclose(res2, expected2)
        self.assertTrue(res2.flags['F_CONTIGUOUS'])
        self.assertFalse(res2.flags['C_CONTIGUOUS'])

    def testClipExecution(self):
        a_data = np.arange(10)

        a = tensor(a_data.copy(), chunk_size=3)

        b = clip(a, 1, 8)

        res = self.executor.execute_tensor(b, concat=True)[0]
        expected = np.clip(a_data, 1, 8)
        self.assertTrue(np.array_equal(res, expected))

        a = tensor(a_data.copy(), chunk_size=3)
        clip(a, 3, 6, out=a)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.clip(a_data, 3, 6)
        self.assertTrue(np.array_equal(res, expected))

        a = tensor(a_data.copy(), chunk_size=3)
        a_min_data = np.random.randint(1, 10, size=(10, ))
        a_max_data = np.random.randint(1, 10, size=(10, ))
        a_min = tensor(a_min_data)
        a_max = tensor(a_max_data)
        clip(a, a_min, a_max, out=a)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.clip(a_data, a_min_data, a_max_data)
        self.assertTrue(np.array_equal(res, expected))

        with option_context() as options:
            options.chunk_size = 3

            a = tensor(a_data.copy(), chunk_size=3)
            b = clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)

            res = self.executor.execute_tensor(b, concat=True)[0]
            expected = np.clip(a_data, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)
            self.assertTrue(np.array_equal(res, expected))

            # test sparse clip
            a_data = sps.csr_matrix([[0, 2, 8], [0, 0, -1]])
            a = tensor(a_data, chunk_size=3)
            b_data = sps.csr_matrix([[0, 3, 0], [1, 0, -2]])

            c = clip(a, b_data, 4)

            res = self.executor.execute_tensor(c, concat=True)[0]
            expected = np.clip(a_data.toarray(), b_data.toarray(), 4)
            self.assertTrue(np.array_equal(res.toarray(), expected))

    def testClipOrderExecution(self):
        a_data = np.asfortranarray(np.random.rand(4, 8))

        a = tensor(a_data, chunk_size=3)

        b = clip(a, 0.2, 0.8)

        res = self.executor.execute_tensor(b, concat=True)[0]
        expected = np.clip(a_data, 0.2, 0.8)

        np.testing.assert_allclose(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testAroundExecution(self):
        data = np.random.randn(10, 20)
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data, decimals=2)

        np.testing.assert_allclose(res, expected)

        data = sps.random(10, 20, density=.2)
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data.toarray(), decimals=2)

        np.testing.assert_allclose(res.toarray(), expected)

    def testAroundOrderExecution(self):
        data = np.asfortranarray(np.random.rand(10, 20))
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data, decimals=2)

        np.testing.assert_allclose(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testCosOrderExecution(self):
        data = np.asfortranarray(np.random.rand(3, 5))
        x = tensor(data, chunk_size=2)

        t = cos(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, np.cos(data))
        self.assertFalse(res.flags['C_CONTIGUOUS'])
        self.assertTrue(res.flags['F_CONTIGUOUS'])

        t2 = cos(x, order='C')

        res2 = self.executor.execute_tensor(t2, concat=True)[0]
        np.testing.assert_allclose(res2, np.cos(data, order='C'))
        self.assertTrue(res2.flags['C_CONTIGUOUS'])
        self.assertFalse(res2.flags['F_CONTIGUOUS'])

    def testIsCloseExecution(self):
        data = np.array([1.05, 1.0, 1.01, np.nan])
        data2 = np.array([1.04, 1.0, 1.03, np.nan])

        x = tensor(data, chunk_size=2)
        y = tensor(data2, chunk_size=3)

        z = isclose(x, y, atol=.01)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, data2, atol=.01)
        np.testing.assert_equal(res, expected)

        z = isclose(x, y, atol=.01, equal_nan=True)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, data2, atol=.01, equal_nan=True)
        np.testing.assert_equal(res, expected)

        # test tensor with scalar
        z = isclose(x, 1.0, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, 1.0, atol=.01)
        np.testing.assert_equal(res, expected)
        z = isclose(1.0, y, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(1.0, data2, atol=.01)
        np.testing.assert_equal(res, expected)
        z = isclose(1.0, 2.0, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(1.0, 2.0, atol=.01)
        np.testing.assert_equal(res, expected)

        # test sparse
        data = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        data2 = sps.csr_matrix(np.array([0, 1.0, 1.03, np.nan]))

        x = tensor(data, chunk_size=2)
        y = tensor(data2, chunk_size=3)

        z = isclose(x, y, atol=.01)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data.toarray(), data2.toarray(), atol=.01)
        np.testing.assert_equal(res, expected)

        z = isclose(x, y, atol=.01, equal_nan=True)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data.toarray(),
                              data2.toarray(),
                              atol=.01,
                              equal_nan=True)
        np.testing.assert_equal(res, expected)

    @ignore_warning
    def testDtypeExecution(self):
        a = ones((10, 20), dtype='f4', chunk_size=5)

        c = truediv(a, 2, dtype='f8')

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertEqual(res.dtype, np.float64)

        c = truediv(a, 0, dtype='f8')
        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(np.isinf(res[0, 0]))

        with self.assertRaises(FloatingPointError):
            with np.errstate(divide='raise'):
                c = truediv(a, 0, dtype='f8')
                _ = self.executor.execute_tensor(c,
                                                 concat=True)[0]  # noqa: F841

    def testSetGetRealExecution(self):
        a_data = np.array([1 + 2j, 3 + 4j, 5 + 6j])
        a = tensor(a_data, chunk_size=2)

        res = self.executor.execute_tensor(a.real, concat=True)[0]
        expected = a_data.real

        np.testing.assert_equal(res, expected)

        a.real = 9

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.real = 9

        np.testing.assert_equal(res, expected)

        a.real = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.real = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

        # test sparse
        a_data = np.array([[1 + 2j, 3 + 4j, 0], [0, 0, 0]])
        a = tensor(sps.csr_matrix(a_data))

        res = self.executor.execute_tensor(a.real, concat=True)[0].toarray()
        expected = a_data.real

        np.testing.assert_equal(res, expected)

        a.real = 9

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.real = 9

        np.testing.assert_equal(res, expected)

        a.real = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.real = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

    def testSetGetImagExecution(self):
        a_data = np.array([1 + 2j, 3 + 4j, 5 + 6j])
        a = tensor(a_data, chunk_size=2)

        res = self.executor.execute_tensor(a.imag, concat=True)[0]
        expected = a_data.imag

        np.testing.assert_equal(res, expected)

        a.imag = 9

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.imag = 9

        np.testing.assert_equal(res, expected)

        a.imag = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.imag = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

        # test sparse
        a_data = np.array([[1 + 2j, 3 + 4j, 0], [0, 0, 0]])
        a = tensor(sps.csr_matrix(a_data))

        res = self.executor.execute_tensor(a.imag, concat=True)[0].toarray()
        expected = a_data.imag

        np.testing.assert_equal(res, expected)

        a.imag = 9

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.imag = 9

        np.testing.assert_equal(res, expected)

        a.imag = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.imag = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

    def testTreeArithmeticExecution(self):
        raws = [np.random.rand(10, 10) for _ in range(10)]
        tensors = [tensor(a, chunk_size=3) for a in raws]

        res = self.executor.execute_tensor(tree_add(*tensors, 1.0),
                                           concat=True)[0]
        np.testing.assert_array_almost_equal(
            res, 1.0 + functools.reduce(operator.add, raws))

        res = self.executor.execute_tensor(tree_multiply(*tensors, 2.0),
                                           concat=True)[0]
        np.testing.assert_array_almost_equal(
            res, 2.0 * functools.reduce(operator.mul, raws))

        raws = [sps.random(5, 9, density=.1) for _ in range(10)]
        tensors = [tensor(a, chunk_size=3) for a in raws]

        res = self.executor.execute_tensor(tree_add(*tensors), concat=True)[0]
        np.testing.assert_array_almost_equal(
            res.toarray(),
            functools.reduce(operator.add, raws).toarray())

    @require_cupy
    def testCupyExecution(self):
        a_data = np.random.rand(10, 10)
        b_data = np.random.rand(10, 10)

        a = tensor(a_data, gpu=True, chunk_size=3)
        b = tensor(b_data, gpu=True, chunk_size=3)
        res_binary = self.executor.execute_tensor((a + b), concat=True)[0]
        np.testing.assert_array_equal(res_binary.get(), (a_data + b_data))

        res_unary = self.executor.execute_tensor(cos(a), concat=True)[0]
        np.testing.assert_array_almost_equal(res_unary.get(), np.cos(a_data))
    def testOptimizedHeadTail(self):
        import sqlalchemy as sa

        with tempfile.TemporaryDirectory() as tempdir:
            executor = ExecutorForTest(storage=self.executor.storage)

            filename = os.path.join(tempdir, 'test_head.csv')
            rs = np.random.RandomState(0)
            pd_df = pd.DataFrame({
                'a':
                rs.randint(1000, size=(100, )).astype(np.int64),
                'b':
                rs.randint(1000, size=(100, )).astype(np.int64),
                'c': ['sss' for _ in range(100)],
                'd': ['eeee' for _ in range(100)]
            })
            pd_df.to_csv(filename, index=False)

            size = os.path.getsize(filename)
            chunk_bytes = size / 3

            df = md.read_csv(filename, chunk_bytes=chunk_bytes)

            # test DataFrame.head
            r = df.head(3)

            with self._inject_execute_data_source(3, DataFrameReadCSV):
                result = executor.execute_tileables([r])[0]
                expected = pd_df.head(3)
                pd.testing.assert_frame_equal(result, expected)

            # test DataFrame.tail
            r = df.tail(3)

            result = executor.execute_tileables([r])[0]
            expected = pd_df.tail(3)
            pd.testing.assert_frame_equal(result.reset_index(drop=True),
                                          expected.reset_index(drop=True))

            # test head more than 1 chunk
            r = df.head(99)

            result = executor.execute_tileables([r])[0]
            result.reset_index(drop=True, inplace=True)
            expected = pd_df.head(99)
            pd.testing.assert_frame_equal(result, expected)

            # test Series.tail more than 1 chunk
            r = df.tail(99)

            result = executor.execute_tileables([r])[0]
            expected = pd_df.tail(99)
            pd.testing.assert_frame_equal(result.reset_index(drop=True),
                                          expected.reset_index(drop=True))

            filename = os.path.join(tempdir, 'test_sql.db')
            conn = sa.create_engine('sqlite:///' + filename)
            pd_df.to_sql('test_sql', conn)

            df = md.read_sql('test_sql',
                             conn,
                             index_col='index',
                             chunk_size=20)

            # test DataFrame.head
            r = df.head(3)

            with self._inject_execute_data_source(3, DataFrameReadSQL):
                result = executor.execute_tileables([r])[0]
                result.index.name = None
                expected = pd_df.head(3)
                pd.testing.assert_frame_equal(result, expected)
Example #3
0
class Test(TestBase):
    def setUp(self) -> None:
        super().setUp()
        self.executor = ExecutorForTest('numpy')
        self.ctx, self.executor = self._create_test_context(self.executor)
        self.ctx.__enter__()

    def tearDown(self) -> None:
        self.ctx.__exit__(None, None, None)

    def testRemoteFunction(self):
        def f1(x):
            return x + 1

        def f2(x, y, z=None):
            return x * y * (z[0] + z[1])

        rs = np.random.RandomState(0)
        raw1 = rs.rand(10, 10)
        raw2 = rs.rand(10, 10)

        r1 = spawn(f1, raw1)
        r2 = spawn(f1, raw2)
        r3 = spawn(f2, (r1, r2), {'z': [r1, r2]})

        result = self.executor.execute_tileables([r3])[0]
        expected = (raw1 + 1) * (raw2 + 1) * (raw1 + 1 + raw2 + 1)
        np.testing.assert_almost_equal(result, expected)

        with self.assertRaises(TypeError):
            spawn(f2, (r1, r2), kwargs=())

        session = new_session()

        def f():
            assert Session.default.session_id == session.session_id
            return mt.ones((2, 3)).sum().to_numpy()

        self.assertEqual(
            spawn(f).execute(session=session).fetch(session=session), 6)

    def testMultiOutput(self):
        sentences = ['word1 word2', 'word2 word3', 'word3 word2 word1']

        def mapper(s):
            word_to_count = defaultdict(lambda: 0)
            for word in s.split():
                word_to_count[word] += 1

            downsides = [defaultdict(lambda: 0), defaultdict(lambda: 0)]
            for word, count in word_to_count.items():
                downsides[mmh3_hash(word) % 2][word] += count

            return downsides

        def reducer(word_to_count_list):
            d = defaultdict(lambda: 0)
            for word_to_count in word_to_count_list:
                for word, count in word_to_count.items():
                    d[word] += count

            return dict(d)

        outs = [], []
        for sentence in sentences:
            out1, out2 = spawn(mapper, sentence, n_output=2)
            outs[0].append(out1)
            outs[1].append(out2)

        rs = []
        for out in outs:
            r = spawn(reducer, out)
            rs.append(r)

        result = dict()
        for wc in ExecutableTuple(rs).to_object():
            result.update(wc)

        self.assertEqual(result, {'word1': 2, 'word2': 3, 'word3': 2})

    def testChainedRemote(self):
        def f(x):
            return x + 1

        def g(x):
            return x * 2

        s = spawn(g, spawn(f, 2))

        result = self.executor.execute_tileables([s])[0]
        self.assertEqual(result, 6)

    def testInputTileable(self):
        def f(t, x):
            return (t * x).sum().to_numpy()

        rs = np.random.RandomState(0)
        raw = rs.rand(5, 4)

        t1 = mt.tensor(raw, chunk_size=3)
        t2 = t1.sum(axis=0)
        s = spawn(f, args=(t2, 3))

        sess = new_session()
        sess._sess._executor = ExecutorForTest('numpy', storage=sess._context)

        result = s.execute(session=sess).fetch(session=sess)
        expected = (raw.sum(axis=0) * 3).sum()
        self.assertAlmostEqual(result, expected)

        df1 = md.DataFrame(raw, chunk_size=3)
        df1.execute(session=sess)
        df2 = shuffle(df1)
        df2.execute(session=sess)

        def f2(input_df):
            bonus = input_df.iloc[:, 0].fetch().sum()
            return input_df.sum().to_pandas() + bonus

        for df in [df1, df2]:
            s = spawn(f2, args=(df, ))

            result = s.execute(session=sess).fetch(session=sess)
            expected = pd.DataFrame(raw).sum() + raw[:, 0].sum()
            pd.testing.assert_series_equal(result, expected)
Example #4
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')
        self.old_chunk = options.chunk_size
        options.chunk_size = 10

    def tearDown(self):
        options.chunk_size = self.old_chunk

    def testBoolIndexingExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        index = arr < .5
        arr2 = arr[index]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2)

        self.assertEqual(sum(s[0] for s in size_res), arr.nbytes)
        np.testing.assert_array_equal(np.sort(np.concatenate(res)),
                                      np.sort(raw[raw < .5]))

        index2 = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr3 = arr[index2]
        res = self.executor.execute_tensor(arr3, concat=True)[0]

        expected = raw[raw[:, :, 0, 0] < .5]
        self.assertEqual(sum(it.size for it in res), expected.size)
        self.assertEqual(res.shape, expected.shape)

        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        index = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[index]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[raw[:, :, 0, 0] < .5].copy('A')

        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testFancyIndexingNumpyExecution(self):
        # test fancy index of type numpy ndarray
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        index = [9, 10, 3, 1, 8, 10]
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index])

        index = np.random.permutation(8)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., index])

        index = [1, 3, 9, 10]
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., index, :5])

        index1 = [8, 10, 3, 1, 9, 10]
        index2 = [1, 3, 9, 10, 2, 7]
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [1, 3, 5, 7, 9, 10]
        index2 = [1, 9, 9, 10, 2, 7]
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])
        # fancy index is ordered, no concat required
        self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1)

        index1 = [[8, 10, 3], [1, 9, 10]]
        index2 = [[1, 3, 9], [10, 2, 7]]
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [[1, 3], [3, 7], [7, 7]]
        index2 = [1, 9]
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, index1, :, index2])

    def testFancyIndexingTensorExecution(self):
        # test fancy index of type tensor

        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index = np.random.permutation(8)
        index = tensor(raw_index, chunk_size=3)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., raw_index])

        raw_index = [1, 3, 9, 10]
        index = tensor(raw_index)
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., raw_index, :5])

        raw_index1 = [8, 10, 3, 1, 9, 10]
        raw_index2 = [1, 3, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=4)
        index2 = tensor(raw_index2, chunk_size=3)
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [1, 3, 5, 7, 9, 10]
        raw_index2 = [1, 9, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=3)
        index2 = tensor(raw_index2, chunk_size=4)
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[8, 10, 3], [1, 9, 10]]
        raw_index2 = [[1, 3, 9], [10, 2, 7]]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2, chunk_size=2)
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1, chunk_size=(2, 1))
        index2 = tensor(raw_index2)
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        raw_a = np.random.rand(30, 30)
        a = tensor(raw_a, chunk_size=(13, 17))
        b = a.argmax(axis=0)
        c = a[b, arange(30)]
        res = self.executor.execute_tensor(c, concat=True)[0]

        np.testing.assert_array_equal(
            res, raw_a[raw_a.argmax(axis=0),
                       np.arange(30)])

        # test one chunk
        arr = tensor(raw, chunk_size=20)

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=20)
        arr9 = arr[index]

        res = self.executor.execute_tensor(arr9, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2)
        arr10 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr10, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr11 = arr[index]

        res = self.executor.execute_tensor(arr11, concat=True)[0]
        expected = raw[raw_index].copy('A')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testSliceExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_array_equal(res, raw[2:9:2, 3:7, -1:-9:-2,
                                               12:-11:-4])

        arr3 = arr[-4, 2:]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_equal(res, raw[-4, 2:])

        raw = sps.random(12, 14, density=.1)
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[-1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_equal(res.toarray(),
                                raw.toarray()[-1:-9:-2, 12:-11:-4])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr3 = arr[0:13, :, None]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = raw[0:13, :, None].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testMixedIndexingExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.random((11, 8, 12, 13))
        arr = tensor(raw, chunk_size=3)

        raw_cond = raw[0, :, 0, 0] < .5
        cond = tensor(raw[0, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[10::-2, cond, None, ..., :5]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        new_shape = list(arr2.shape)
        new_shape[1] = cond.shape[0]
        self.assertEqual(sum(s[0] for s in size_res),
                         int(np.prod(new_shape) * arr2.dtype.itemsize))
        np.testing.assert_array_equal(res, raw[10::-2, raw_cond, None,
                                               ..., :5])

        b_raw = np.random.random(8)
        raw_cond = b_raw < .5
        conds = [raw_cond, tensor(b_raw, chunk_size=2) < .5]
        for cond in conds:
            arr3 = arr[-2::-3, cond, ...]
            res = self.executor.execute_tensor(arr3, concat=True)[0]

            np.testing.assert_array_equal(res, raw[-2::-3, raw_cond, ...])

        # test multiple bool index and fancy index
        cond1 = np.zeros(11, dtype=bool)
        cond1[rs.permutation(11)[:5]] = True
        cond2 = np.zeros(12, dtype=bool)
        cond2[rs.permutation(12)[:5]] = True
        f3 = np.random.randint(13, size=5)

        expected = raw[cond1, ..., cond2, f3]

        t = arr[cond1, ..., cond2, f3]
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            t = arr[tensor(cond1), ..., tensor(cond2), tensor(f3)]
            res = executor.execute_tensors([t])[0]
            np.testing.assert_array_equal(res, expected)

    def testSetItemExecution(self):
        rs = np.random.RandomState(0)

        raw = data = rs.randint(0, 10, size=(11, 8, 12, 13))
        arr = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        raw = data
        shape = raw[idx].shape

        arr2 = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        replace = rs.randint(10, 20, size=shape[:-1] + (1, )).astype('f4')
        arr2[idx] = tensor(replace, chunk_size=4)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        raw[idx] = replace
        np.testing.assert_array_equal(res, raw)

        raw = np.asfortranarray(np.random.randint(0, 10, size=(11, 8, 12, 13)))
        arr = tensor(raw.copy('A'), chunk_size=3)
        raw = raw.copy('A')

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        # test bool indexing set
        raw = data

        arr = tensor(raw.copy(), chunk_size=3)
        raw1 = rs.rand(11)
        arr[tensor(raw1, chunk_size=4) < 0.6, 2:7] = 3
        res = self.executor.execute_tileable(arr, concat=True)[0]

        raw[raw1 < 0.6, 2:7] = 3
        np.testing.assert_array_equal(res, raw)

        raw = np.random.randint(3, size=10).astype(np.int64)
        raw2 = np.arange(3)

        arr = zeros((10, 3))
        arr[tensor(raw) == 1, tensor(raw2) == 1] = 1
        res = self.executor.execute_tileable(arr, concat=True)[0]

        expected = np.zeros((10, 3))
        expected[raw == 1, raw2 == 1] = 1
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw = data

            arr = tensor(raw.copy(), chunk_size=3)
            raw1 = rs.rand(11)
            set_data = rs.rand((raw1 < 0.8).sum(), 8, 12, 13)
            arr[tensor(raw1, chunk_size=4) < 0.8] = tensor(set_data)

            res = self.executor.execute_tileables([arr])[0]

            raw[raw1 < 0.8] = set_data
            np.testing.assert_array_equal(res, raw)

        # test error
        with self.assertRaises(ValueError):
            t = tensor(raw, chunk_size=3)
            t[0, 0, 0, 0] = zeros(2, chunk_size=10)
            _ = self.executor.execute_tensor(t)

    def testSetItemStructuredExecution(self):
        rec_type = np.dtype([('a', np.int32), ('b', np.double),
                             ('c', np.dtype([('a', np.int16),
                                             ('b', np.int64)]))])

        raw = np.zeros((4, 5), dtype=rec_type)
        arr = tensor(raw.copy(), chunk_size=3)

        arr[1:4, 1] = (3, 4., (5, 6))
        arr[1:4, 2] = 8
        arr[1:3] = np.arange(5)
        arr[2:4] = np.arange(10).reshape(2, 5)
        arr[0] = np.arange(5)

        raw[1:4, 1] = (3, 4., (5, 6))
        raw[1:4, 2] = 8
        raw[1:3] = np.arange(5)
        raw[2:4] = np.arange(10).reshape(2, 5)
        raw[0] = np.arange(5)

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(arr.dtype, raw.dtype)
        self.assertEqual(arr.shape, raw.shape)
        np.testing.assert_array_equal(res, raw)

    def testTakeExecution(self):
        data = np.random.rand(10, 20, 30)
        t = tensor(data, chunk_size=10)

        a = t.take([4, 1, 2, 6, 200])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [4, 1, 2, 6, 200])
        np.testing.assert_array_equal(res, expected)

        a = take(t, [5, 19, 2, 13], axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [5, 19, 2, 13], axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(ValueError):
            take(t, [1, 3, 4], out=tensor(np.random.rand(4)))

        out = tensor([1, 2, 3, 4])
        a = take(t, [4, 19, 2, 8], out=out)

        res = self.executor.execute_tensor(out, concat=True)[0]
        expected = np.take(data, [4, 19, 2, 8])
        np.testing.assert_array_equal(res, expected)

    def testCompressExecution(self):
        data = np.array([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([0, 1], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        t = a.compress([0, 1, 1])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True, True], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True, True], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(np.AxisError):
            compress([0, 1, 1], a, axis=1)

        # test order
        data = np.asfortranarray([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        t = compress([0, 1, 1],
                     a,
                     axis=0,
                     out=tensor(np.empty((2, 2), order='F', dtype=int)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1],
                               data,
                               axis=0,
                               out=np.empty((2, 2), order='F', dtype=int))
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testExtractExecution(self):
        data = np.arange(12).reshape((3, 4))
        a = tensor(data, chunk_size=2)
        condition = mod(a, 3) == 0

        t = extract(condition, a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.extract(np.mod(data, 3) == 0, data)
        np.testing.assert_array_equal(res, expected)

    def testChooseExecution(self):
        options.chunk_size = 2

        choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23],
                   [30, 31, 32, 33]]
        a = choose([2, 3, 1, 0], choices)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.choose([2, 3, 1, 0], choices)

        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='clip')  # 4 goes to 3 (4-1)
        expected = np.choose([2, 4, 1, 0], choices, mode='clip')

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='wrap')  # 4 goes to (4 mod 4)
        expected = np.choose([2, 4, 1, 0], choices,
                             mode='wrap')  # 4 goes to (4 mod 4)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
        choices = [-10, 10]

        b = choose(a, choices)
        expected = np.choose(a, choices)

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = np.array([0, 1]).reshape((2, 1, 1))
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1))
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5))

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        # test order
        a = np.array([0, 1]).reshape((2, 1, 1), order='F')
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1), order='F')
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5), order='F')

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        b = choose(a, (c1, c2), out=tensor(np.empty(res.shape, order='F')))
        expected = np.choose(a, (c1, c2), out=np.empty(res.shape, order='F'))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testUnravelExecution(self):
        a = tensor([22, 41, 37], chunk_size=1)
        t = stack(unravel_index(a, (7, 6)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.stack(np.unravel_index([22, 41, 37], (7, 6)))

        np.testing.assert_array_equal(res, expected)

    def testNonzeroExecution(self):
        data = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]])
        x = tensor(data, chunk_size=2)
        t = hstack(nonzero(x))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data))

        np.testing.assert_array_equal(res, expected)

        t = hstack((x > 1).nonzero())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data > 1))

        np.testing.assert_array_equal(res, expected)

    def testFlatnonzeroExecution(self):
        x = arange(-2, 3, chunk_size=2)

        t = flatnonzero(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flatnonzero(np.arange(-2, 3))

        np.testing.assert_equal(res, expected)

    def testFillDiagonalExecution(self):
        # 2-d
        raws = [
            np.random.rand(30, 11),
            np.random.rand(15, 15),
            np.random.rand(11, 30),
            sps.random(30, 11, density=0.1, format='csr')
        ]

        def copy(x):
            if hasattr(x, 'nnz'):
                # sparse
                return x.A
            else:
                return x.copy()

        for raw in raws:
            # test 1 chunk, wrap=False
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test 1 chunk, wrap=True
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=False
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3])

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([1, 2, 3]))

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=True
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3], wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([[1, 2], [3, 4]]), wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3, 4], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

        # 3-d
        raw = np.random.rand(11, 11, 11)

        expected = raw.copy()
        np.fill_diagonal(expected, 1)
        expected2 = raw.copy()
        np.fill_diagonal(expected2, 1, wrap=True)
        np.testing.assert_array_equal(expected, expected2)

        # test 1 chunk
        t = tensor(raw, chunk_size=30)
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=30)
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test multiple chunk
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=(3, 4, 5))
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test val with list type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, [[1, 2], [3, 4]])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3, 4])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type which ndim == 0
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]).sum())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, 6)

        np.testing.assert_array_equal(res, expected)

        # test val with ndarray type which size is too long
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, np.arange(20))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, np.arange(20))

        np.testing.assert_array_equal(res, expected)
Example #5
0
class Test(TestBase):
    def setUp(self) -> None:
        super().setUp()
        self.executor = ExecutorForTest('numpy')
        self.ctx, self.executor = self._create_test_context(self.executor)
        self.ctx.__enter__()

    def tearDown(self) -> None:
        self.ctx.__exit__(None, None, None)

    def testRemoteFunction(self):
        def f1(x):
            return x + 1

        def f2(x, y, z=None):
            return x * y * (z[0] + z[1])

        rs = np.random.RandomState(0)
        raw1 = rs.rand(10, 10)
        raw2 = rs.rand(10, 10)

        r1 = spawn(f1, raw1)
        r2 = spawn(f1, raw2)
        r3 = spawn(f2, (r1, r2), {'z': [r1, r2]})

        result = self.executor.execute_tileables([r3])[0]
        expected = (raw1 + 1) * (raw2 + 1) * (raw1 + 1 + raw2 + 1)
        np.testing.assert_almost_equal(result, expected)

        with self.assertRaises(TypeError):
            spawn(f2, (r1, r2), kwargs=())

        session = new_session()

        def f():
            assert Session.default.session_id == session.session_id
            return mt.ones((2, 3)).sum().to_numpy()

        self.assertEqual(
            spawn(f).execute(session=session).fetch(session=session), 6)

    def testMultiOutput(self):
        sentences = ['word1 word2', 'word2 word3', 'word3 word2 word1']

        def mapper(s):
            word_to_count = defaultdict(lambda: 0)
            for word in s.split():
                word_to_count[word] += 1

            downsides = [defaultdict(lambda: 0), defaultdict(lambda: 0)]
            for word, count in word_to_count.items():
                downsides[mmh3_hash(word) % 2][word] += count

            return downsides

        def reducer(word_to_count_list):
            d = defaultdict(lambda: 0)
            for word_to_count in word_to_count_list:
                for word, count in word_to_count.items():
                    d[word] += count

            return dict(d)

        outs = [], []
        for sentence in sentences:
            out1, out2 = spawn(mapper, sentence, n_output=2)
            outs[0].append(out1)
            outs[1].append(out2)

        rs = []
        for out in outs:
            r = spawn(reducer, out)
            rs.append(r)

        result = dict()
        for wc in ExecutableTuple(rs).execute().fetch():
            result.update(wc)

        self.assertEqual(result, {'word1': 2, 'word2': 3, 'word3': 2})

    def testChainedRemote(self):
        def f(x):
            return x + 1

        def g(x):
            return x * 2

        s = spawn(g, spawn(f, 2))

        result = self.executor.execute_tileables([s])[0]
        self.assertEqual(result, 6)
Example #6
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        this = self

        class MockSession:
            @property
            def executor(self):
                return this.executor

        self.ctx = ctx = LocalContext(MockSession())
        self.executor = ExecutorForTest('numpy', storage=ctx)
        ctx.__enter__()

    def tearDown(self) -> None:
        self.ctx.__exit__(None, None, None)

    def test__check_targets(self):
        # Check that _check_targets correctly merges target types, squeezes
        # output and fails if input lengths differ.
        IND = 'multilabel-indicator'
        MC = 'multiclass'
        BIN = 'binary'
        CNT = 'continuous'
        MMC = 'multiclass-multioutput'
        MCN = 'continuous-multioutput'
        # all of length 3
        EXAMPLES = [
            (IND, np.array([[0, 1, 1], [1, 0, 0], [0, 0, 1]])),
            # must not be considered binary
            (IND, np.array([[0, 1], [1, 0], [1, 1]])),
            (MC, [2, 3, 1]),
            (BIN, [0, 1, 1]),
            (CNT, [0., 1.5, 1.]),
            (MC, np.array([[2], [3], [1]])),
            (BIN, np.array([[0], [1], [1]])),
            (CNT, np.array([[0.], [1.5], [1.]])),
            (MMC, np.array([[0, 2], [1, 3], [2, 3]])),
            (MCN, np.array([[0.5, 2.], [1.1, 3.], [2., 3.]])),
        ]
        # expected type given input types, or None for error
        # (types will be tried in either order)
        EXPECTED = {
            (IND, IND): IND,
            (MC, MC): MC,
            (BIN, BIN): BIN,
            (MC, IND): None,
            (BIN, IND): None,
            (BIN, MC): MC,

            # Disallowed types
            (CNT, CNT): None,
            (MMC, MMC): None,
            (MCN, MCN): None,
            (IND, CNT): None,
            (MC, CNT): None,
            (BIN, CNT): None,
            (MMC, CNT): None,
            (MCN, CNT): None,
            (IND, MMC): None,
            (MC, MMC): None,
            (BIN, MMC): None,
            (MCN, MMC): None,
            (IND, MCN): None,
            (MC, MCN): None,
            (BIN, MCN): None,
        }

        for (type1, y1), (type2, y2) in product(EXAMPLES, repeat=2):
            try:
                expected = EXPECTED[type1, type2]
            except KeyError:
                expected = EXPECTED[type2, type1]
            if expected is None:
                with self.assertRaises(ValueError):
                    self.executor.execute_tileables(_check_targets(y1, y2))

                if type1 != type2:
                    with self.assertRaises(ValueError):
                        self.executor.execute_tileables(_check_targets(y1, y2))

                else:
                    if type1 not in (BIN, MC, IND):
                        with self.assertRaises(ValueError):
                            self.executor.execute_tileables(
                                _check_targets(y1, y2))

            else:
                merged_type, y1out, y2out = \
                    self.executor.execute_tileables(_check_targets(y1, y2))
                assert merged_type == expected
                if merged_type.item().startswith('multilabel'):
                    self.assertIsInstance(y1out, SparseNDArray)
                    self.assertIsInstance(y2out, SparseNDArray)
                else:
                    np.testing.assert_array_equal(y1out, np.squeeze(y1))
                    np.testing.assert_array_equal(y2out, np.squeeze(y2))
                with self.assertRaises(ValueError):
                    self.executor.execute_tileables(_check_targets(
                        y1[:-1], y2))

    @unittest.skipIf(sklearn is None, 'scikit-learn not installed')
    def testAccuracyScore(self):
        y_pred = [0, 2, 1, 3]
        y_true = [0, 1, 2, 3]

        score = accuracy_score(y_true, y_pred)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred)
        self.assertAlmostEqual(result, expected)

        score = accuracy_score(y_true, y_pred, normalize=False)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred, normalize=False)
        self.assertAlmostEqual(result, expected)

        y_pred = np.array([[0, 1], [1, 1]])
        y_true = np.ones((2, 2))
        score = accuracy_score(y_true, y_pred)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred)
        self.assertAlmostEqual(result, expected)

        sample_weight = [0.7, 0.3]
        score = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true,
                                          y_pred,
                                          sample_weight=sample_weight)
        self.assertAlmostEqual(result, expected)

        score = accuracy_score(mt.tensor(y_true),
                               mt.tensor(y_pred),
                               sample_weight=mt.tensor(sample_weight),
                               normalize=False)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true,
                                          y_pred,
                                          sample_weight=sample_weight,
                                          normalize=False)
        self.assertAlmostEqual(result, expected)
Example #7
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testShuffleExpr(self):
        a = mt.random.rand(10, 3, chunk_size=2)
        b = md.DataFrame(mt.random.rand(10, 5), chunk_size=2)

        new_a, new_b = shuffle(a, b, random_state=0)

        self.assertIs(new_a.op, new_b.op)
        self.assertIsInstance(new_a.op, LearnShuffle)
        self.assertEqual(new_a.shape, a.shape)
        self.assertEqual(new_b.shape, b.shape)
        self.assertNotEqual(b.index_value.key, new_b.index_value.key)

        new_a = new_a.tiles()
        new_b = get_tiled(new_b)

        self.assertEqual(len(new_a.chunks), 10)
        self.assertTrue(np.isnan(new_a.chunks[0].shape[0]))
        self.assertEqual(len(new_b.chunks), 15)
        self.assertTrue(np.isnan(new_b.chunks[0].shape[0]))
        self.assertNotEqual(new_b.chunks[0].index_value.key, new_b.chunks[1].index_value.key)
        self.assertEqual(new_a.chunks[0].op.seeds, new_b.chunks[0].op.seeds)

        c = mt.random.rand(10, 5, 3, chunk_size=2)
        d = md.DataFrame(mt.random.rand(10, 5), chunk_size=(2, 5))

        new_c, new_d = shuffle(c, d, axes=(0, 1), random_state=0)

        self.assertIs(new_c.op, new_d.op)
        self.assertIsInstance(new_c.op, LearnShuffle)
        self.assertEqual(new_c.shape, c.shape)
        self.assertEqual(new_d.shape, d.shape)
        self.assertNotEqual(d.index_value.key, new_d.index_value.key)
        self.assertFalse(np.all(new_d.dtypes.index[:-1] < new_d.dtypes.index[1:]))
        pd.testing.assert_series_equal(d.dtypes, new_d.dtypes.sort_index())

        new_c = new_c.tiles()
        new_d = get_tiled(new_d)

        self.assertEqual(len(new_c.chunks), 5 * 1 * 2)
        self.assertTrue(np.isnan(new_c.chunks[0].shape[0]))
        self.assertEqual(len(new_d.chunks), 5)
        self.assertTrue(np.isnan(new_d.chunks[0].shape[0]))
        self.assertEqual(new_d.chunks[0].shape[1], 5)
        self.assertNotEqual(new_d.chunks[0].index_value.key, new_d.chunks[1].index_value.key)
        pd.testing.assert_series_equal(new_d.chunks[0].dtypes.sort_index(), d.dtypes)
        self.assertEqual(new_c.chunks[0].op.seeds, new_d.chunks[0].op.seeds)
        self.assertEqual(len(new_c.chunks[0].op.seeds), 1)
        self.assertEqual(new_c.chunks[0].op.reduce_sizes, (5,))

        with self.assertRaises(ValueError):
            a = mt.random.rand(10, 5)
            b = mt.random.rand(10, 4, 3)
            shuffle(a, b, axes=1)

        with self.assertRaises(TypeError):
            shuffle(a, b, unknown_param=True)

        self.assertIsInstance(shuffle(mt.random.rand(10, 5)), mt.Tensor)

    @staticmethod
    def _sort(data, axes):
        cur = data
        for ax in axes:
            if ax < data.ndim:
                cur = np.sort(cur, axis=ax)
        return cur

    def testShuffleExecution(self):
        # test consistency
        s1 = np.arange(9).reshape(3, 3)
        s2 = np.arange(1, 10).reshape(3, 3)
        ts1 = mt.array(s1, chunk_size=2)
        ts2 = mt.array(s2, chunk_size=2)

        ret = shuffle(ts1, ts2, axes=[0, 1], random_state=0)
        res1, res2 = self.executor.execute_tileables(ret)

        # calc row index
        s1_col_0 = s1[:, 0].tolist()
        rs1_col_0 = [res1[:, i] for i in range(3) if set(s1_col_0) == set(res1[:, i])][0]
        row_index = [s1_col_0.index(j) for j in rs1_col_0]
        # calc col index
        s1_row_0 = s1[0].tolist()
        rs1_row_0 = [res1[i] for i in range(3) if set(s1_row_0) == set(res1[i])][0]
        col_index = [s1_row_0.index(j) for j in rs1_row_0]
        np.testing.assert_array_equal(res2, s2[row_index][:, col_index])

        # tensor + tensor
        raw1 = np.random.rand(10, 15, 20)
        t1 = mt.array(raw1, chunk_size=8)
        raw2 = np.random.rand(10, 15, 20)
        t2 = mt.array(raw2, chunk_size=5)

        for axes in [(0,), (0, 1), (0, 2), (1, 2), (0, 1, 2)]:
            ret = shuffle(t1, t2, axes=axes, random_state=0)
            res1, res2 = self.executor.execute_tileables(ret)

            self.assertEqual(res1.shape, raw1.shape)
            self.assertEqual(res2.shape, raw2.shape)
            np.testing.assert_array_equal(Test._sort(raw1, axes), Test._sort(res1, axes))
            np.testing.assert_array_equal(Test._sort(raw2, axes), Test._sort(res2, axes))

        # tensor + tensor(more dimension)
        raw3 = np.random.rand(10, 15)
        t3 = mt.array(raw3, chunk_size=(8, 15))
        raw4 = np.random.rand(10, 15, 20)
        t4 = mt.array(raw4, chunk_size=(5, 15, 10))

        for axes in [(1,), (0, 1), (1, 2)]:
            ret = shuffle(t3, t4, axes=axes, random_state=0)
            res3, res4 = self.executor.execute_tileables(ret)

            self.assertEqual(res3.shape, raw3.shape)
            self.assertEqual(res4.shape, raw4.shape)
            np.testing.assert_array_equal(Test._sort(raw3, axes), Test._sort(res3, axes))
            np.testing.assert_array_equal(Test._sort(raw4, axes), Test._sort(res4, axes))

        # tensor + dataframe + series
        raw5 = np.random.rand(10, 15, 20)
        t5 = mt.array(raw5, chunk_size=8)
        raw6 = pd.DataFrame(np.random.rand(10, 15))
        df = md.DataFrame(raw6, chunk_size=(8, 15))
        raw7 = pd.Series(np.random.rand(10))
        series = md.Series(raw7, chunk_size=8)

        for axes in [(0,), (1,), (0, 1), (1, 2), [0, 1, 2]]:
            ret = shuffle(t5, df, series, axes=axes, random_state=0)
            # skip check nsplits because it's updated
            res5, res_df, res_series = self.executor.execute_tileables(ret, check_nsplits=False)

            self.assertEqual(res5.shape, raw5.shape)
            self.assertEqual(res_df.shape, df.shape)
            self.assertEqual(res_series.shape, series.shape)