Exemple #1
0
    def testSparseDotExecution(self):
        size_executor = Executor(
            sync_provider_type=Executor.SyncProviderType.MOCK)

        a_data = sps.random(5, 9, density=.1)
        b_data = sps.random(9, 10, density=.2)
        a = tensor(a_data, chunk_size=2)
        b = tensor(b_data, chunk_size=3)

        c = dot(a, b)

        size_res = size_executor.execute_tensor(c, mock=True)
        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertEqual(sum(s[0] for s in size_res), 0)
        self.assertGreaterEqual(sum(s[1] for s in size_res), 0)
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c2 = dot(a, b, sparse=False)

        size_res = size_executor.execute_tensor(c2, mock=True)
        res = self.executor.execute_tensor(c2, concat=True)[0]
        self.assertEqual(sum(s[0] for s in size_res), c2.nbytes)
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c3 = tensordot(a, b.T, (-1, -1), sparse=False)

        res = self.executor.execute_tensor(c3, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c = inner(a, b.T)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c = inner(a, b.T, sparse=False)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        # test vector inner
        a_data = np.random.rand(5)
        b_data = np.random.rand(5)
        a = tensor(a_data, chunk_size=2).tosparse()
        b = tensor(b_data, chunk_size=2).tosparse()

        c = inner(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(np.isscalar(res))
        np.testing.assert_allclose(res, np.inner(a_data, b_data))
Exemple #2
0
    def testSparseDotSizeExecution(self):
        from mars.tensor.linalg.tensordot import TensorTensorDot
        from mars.executor import register, register_default
        chunk_sizes = dict()
        chunk_nbytes = dict()
        chunk_input_sizes = dict()
        chunk_input_nbytes = dict()

        def execute_size(t):
            def _tensordot_size_recorder(ctx, op):
                TensorTensorDot.estimate_size(ctx, op)

                chunk_key = op.outputs[0].key
                chunk_sizes[chunk_key] = ctx[chunk_key]
                chunk_nbytes[chunk_key] = op.outputs[0].nbytes

                input_sizes = dict(
                    (inp.op.key, ctx[inp.key][0]) for inp in op.inputs)
                chunk_input_sizes[chunk_key] = sum(input_sizes.values())
                input_nbytes = dict(
                    (inp.op.key, inp.nbytes) for inp in op.inputs)
                chunk_input_nbytes[chunk_key] = sum(input_nbytes.values())

            size_executor = ExecutorForTest(
                sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)
            try:
                chunk_sizes.clear()
                chunk_nbytes.clear()
                chunk_input_sizes.clear()
                chunk_input_nbytes.clear()
                register(TensorTensorDot,
                         size_estimator=_tensordot_size_recorder)
                size_executor.execute_tensor(t, mock=True)
            finally:
                register_default(TensorTensorDot)

        a_data = sps.random(5, 9, density=.1)
        b_data = sps.random(9, 10, density=.2)
        a = tensor(a_data, chunk_size=2)
        b = tensor(b_data, chunk_size=3)

        c = dot(a, b)
        execute_size(c)

        for key in chunk_input_sizes.keys():
            self.assertGreaterEqual(chunk_sizes[key][1],
                                    chunk_input_sizes[key])

        c2 = dot(a, b, sparse=False)
        execute_size(c2)

        for key in chunk_input_sizes.keys():
            self.assertEqual(chunk_sizes[key][0], chunk_nbytes[key])
            self.assertEqual(chunk_sizes[key][1],
                             chunk_input_nbytes[key] + chunk_nbytes[key])
Exemple #3
0
def test_sparse_dot_execution(setup):
    rs = np.random.RandomState(0)

    a_data = sps.random(5, 9, density=.1)
    b_data = sps.random(9, 10, density=.2)
    a = tensor(a_data, chunk_size=2)
    b = tensor(b_data, chunk_size=3)

    c = dot(a, b)

    res = c.execute().fetch()
    assert issparse(res) is True
    np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

    c2 = dot(a, b, sparse=False)

    res = c2.execute().fetch()
    assert issparse(res) is False
    np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

    c3 = tensordot(a, b.T, (-1, -1), sparse=False)

    res = c3.execute().fetch()
    assert issparse(res) is False
    np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

    c = inner(a, b.T)

    res = c.execute().fetch()
    assert issparse(res) is True
    np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

    c = inner(a, b.T, sparse=False)

    res = c.execute().fetch()
    assert issparse(res) is False
    np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

    # test vector inner
    a_data = rs.rand(5)
    b_data = rs.rand(5)
    a = tensor(a_data, chunk_size=2).tosparse()
    b = tensor(b_data, chunk_size=2).tosparse()

    c = inner(a, b)

    res = c.execute().fetch()
    assert np.isscalar(res) is True
    np.testing.assert_allclose(res, np.inner(a_data, b_data))
Exemple #4
0
def test_randomized_svd_execution(setup):
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10
    for dtype in (np.int64, np.float64):
        # generate a matrix X of approximate effective rank `rank` and no noise
        # component (very structured signal):
        X = make_low_rank_matrix(n_samples=n_samples,
                                 n_features=n_features,
                                 effective_rank=rank,
                                 tail_strength=0.0,
                                 random_state=0).astype(dtype, copy=False)
        assert X.shape == (n_samples, n_features)
        dtype = np.dtype(dtype)
        decimal = 5 if dtype == np.float32 else 7

        # compute the singular values of X using the slow exact method
        X_res = X.execute().fetch()
        U, s, V = np.linalg.svd(X_res, full_matrices=False)

        # Convert the singular values to the specific dtype
        U = U.astype(dtype, copy=False)
        s = s.astype(dtype, copy=False)
        V = V.astype(dtype, copy=False)

        for normalizer in ['auto', 'LU', 'QR']:  # 'none' would not be stable
            # compute the singular values of X using the fast approximate method
            Ua, sa, Va = randomized_svd(X,
                                        k,
                                        n_iter=1,
                                        power_iteration_normalizer=normalizer,
                                        random_state=0)

            # If the input dtype is float, then the output dtype is float of the
            # same bit size (f32 is not upcast to f64)
            # But if the input dtype is int, the output dtype is float64
            if dtype.kind == 'f':
                assert Ua.dtype == dtype
                assert sa.dtype == dtype
                assert Va.dtype == dtype
            else:
                assert Ua.dtype == np.float64
                assert sa.dtype == np.float64
                assert Va.dtype == np.float64

            assert Ua.shape == (n_samples, k)
            assert sa.shape == (k, )
            assert Va.shape == (k, n_features)

            # ensure that the singular values of both methods are equal up to the
            # real rank of the matrix
            sa_res = sa.execute().fetch()
            np.testing.assert_almost_equal(s[:k], sa_res, decimal=decimal)

            # check the singular vectors too (while not checking the sign)
            dot_res = dot(Ua, Va).execute().fetch()
            np.testing.assert_almost_equal(np.dot(U[:, :k], V[:k, :]),
                                           dot_res,
                                           decimal=decimal)
Exemple #5
0
    def testSparseDotExecution(self):
        a_data = sps.random(5, 9, density=.1)
        b_data = sps.random(9, 10, density=.2)
        a = tensor(a_data, chunk_size=2)
        b = tensor(b_data, chunk_size=3)

        c = dot(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c2 = dot(a, b, sparse=False)

        res = self.executor.execute_tensor(c2, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c3 = tensordot(a, b.T, (-1, -1), sparse=False)

        res = self.executor.execute_tensor(c3, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c = inner(a, b.T)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c = inner(a, b.T, sparse=False)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        # test vector inner
        a_data = np.random.rand(5)
        b_data = np.random.rand(5)
        a = tensor(a_data, chunk_size=2).tosparse()
        b = tensor(b_data, chunk_size=2).tosparse()

        c = inner(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(np.isscalar(res))
        np.testing.assert_allclose(res, np.inner(a_data, b_data))
Exemple #6
0
    def testTensordot(self):
        from mars.tensor.linalg import tensordot, dot, inner

        t1 = ones((3, 4, 6), chunk_size=2)
        t2 = ones((4, 3, 5), chunk_size=2)
        t3 = tensordot(t1, t2, axes=((0, 1), (1, 0)))

        self.assertEqual(t3.shape, (6, 5))

        t3.tiles()

        self.assertEqual(t3.shape, (6, 5))
        self.assertEqual(len(t3.chunks), 9)

        a = ones((10000, 20000), chunk_size=5000)
        b = ones((20000, 1000), chunk_size=5000)

        with self.assertRaises(ValueError):
            tensordot(a, b)

        a = ones(10, chunk_size=2)
        b = ones((10, 20), chunk_size=2)
        c = dot(a, b)
        self.assertEqual(c.shape, (20,))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))

        a = ones((10, 20), chunk_size=2)
        b = ones(20, chunk_size=2)
        c = dot(a, b)
        self.assertEqual(c.shape, (10,))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))

        v = ones((100, 100), chunk_size=10)
        tv = v.dot(v)
        self.assertEqual(tv.shape, (100, 100))
        tv.tiles()
        self.assertEqual(tv.shape, tuple(sum(s) for s in tv.nsplits))

        a = ones((10, 20), chunk_size=2)
        b = ones((30, 20), chunk_size=2)
        c = inner(a, b)
        self.assertEqual(c.shape, (10, 30))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))
Exemple #7
0
def test_tensordot_execution(setup):
    rs = np.random.RandomState(0)
    # size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)
    #
    # a_data = np.arange(60).reshape(3, 4, 5)
    # a = tensor(a_data, chunk_size=2)
    # b_data = np.arange(24).reshape(4, 3, 2)
    # b = tensor(b_data, chunk_size=2)
    #
    # axes = ([1, 0], [0, 1])
    # c = tensordot(a, b, axes=axes)
    # size_res = size_executor.execute_tensor(c, mock=True)
    # assert sum(s[0] for s in size_res) == c.nbytes
    # assert sum(s[1] for s in size_res) == c.nbytes

    a = ones((100, 200), chunk_size=50)
    b = ones((200, 10), chunk_size=50)
    c = dot(a, b)
    res = c.execute().fetch()
    expected = np.dot(np.ones((100, 200)), np.ones((200, 10)))
    np.testing.assert_array_equal(res, expected)

    a = ones((10, 8), chunk_size=4)
    b = ones((8, 10), chunk_size=4)
    c = a.dot(b)
    res = c.execute().fetch()
    np.testing.assert_array_equal(res, np.tile([8], [10, 10]))

    a = ones((500, 500), chunk_size=500)
    b = ones((500, 100), chunk_size=500)
    c = a.dot(b)
    res = c.execute().fetch()
    np.testing.assert_array_equal(res, np.tile([500], [500, 100]))

    raw_a = rs.random((100, 200, 50))
    raw_b = rs.random((200, 10, 100))
    a = tensor(raw_a, chunk_size=50)
    b = tensor(raw_b, chunk_size=33)
    c = tensordot(a, b, axes=((0, 1), (2, 0)))
    res = c.execute().fetch()
    expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes))
    np.testing.assert_array_almost_equal(res, expected)

    a = ones((100, 200), chunk_size=50)
    b = ones((10, 200), chunk_size=50)
    c = inner(a, b)
    res = c.execute().fetch()
    expected = np.inner(np.ones((100, 200)), np.ones((10, 200)))
    np.testing.assert_array_equal(res, expected)

    a = ones((100, 100), chunk_size=30)
    b = ones((100, 100), chunk_size=30)
    c = a.dot(b)
    res = c.execute().fetch()
    np.testing.assert_array_equal(res, np.ones((100, 100)) * 100)
Exemple #8
0
    def testTensordotExecution(self):
        size_executor = ExecutorForTest(
            sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        a_data = np.arange(60).reshape(3, 4, 5)
        a = tensor(a_data, chunk_size=2)
        b_data = np.arange(24).reshape(4, 3, 2)
        b = tensor(b_data, chunk_size=2)

        axes = ([1, 0], [0, 1])
        c = tensordot(a, b, axes=axes)
        size_res = size_executor.execute_tensor(c, mock=True)
        self.assertEqual(sum(s[0] for s in size_res), c.nbytes)
        self.assertEqual(sum(s[1] for s in size_res), c.nbytes)

        res = self.executor.execute_tensor(c)
        expected = np.tensordot(a_data, b_data, axes=axes)
        self.assertTrue(np.array_equal(res[0], expected[:2, :]))
        self.assertTrue(np.array_equal(res[1], expected[2:4, :]))
        self.assertTrue(np.array_equal(res[2], expected[4:, :]))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((2000, 100), chunk_size=500)
        c = dot(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.dot(np.ones((1000, 2000)), np.ones((2000, 100)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((10, 8), chunk_size=2)
        b = ones((8, 10), chunk_size=2)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertEqual(len(res), 25)
        for r in res:
            self.assertTrue(np.array_equal(r, np.tile([8], [2, 2])))

        a = ones((500, 500), chunk_size=500)
        b = ones((500, 100), chunk_size=500)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertTrue(np.array_equal(res[0], np.tile([500], [500, 100])))

        raw_a = np.random.random((100, 200, 50))
        raw_b = np.random.random((200, 10, 100))
        a = tensor(raw_a, chunk_size=50)
        b = tensor(raw_b, chunk_size=33)
        c = tensordot(a, b, axes=((0, 1), (2, 0)))
        res = self.executor.execute_tensor(c, concat=True)
        expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes))
        self.assertTrue(np.allclose(res[0], expected))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((100, 2000), chunk_size=500)
        c = inner(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.inner(np.ones((1000, 2000)), np.ones((100, 2000)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((100, 100), chunk_size=30)
        b = ones((100, 100), chunk_size=30)
        c = a.dot(b)
        res = self.executor.execute_tensor(c, concat=True)[0]
        np.testing.assert_array_equal(res, np.ones((100, 100)) * 100)