Beispiel #1
0
    def testWhitening(self):
        # Check that PCA output has unit-variance
        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 80
        n_components = 30
        rank = 50

        # some low rank data with correlated features
        X = mt.dot(
            rng.randn(n_samples, rank),
            mt.dot(mt.diag(mt.linspace(10.0, 1.0, rank)),
                   rng.randn(rank, n_features)))
        # the component-wise variance of the first 50 features is 3 times the
        # mean component-wise variance of the remaining 30 features
        X[:, :50] *= 3

        self.assertEqual(X.shape, (n_samples, n_features))

        # the component-wise variance is thus highly varying:
        self.assertGreater(X.std(axis=0).std().to_numpy(), 43.8)

        for solver, copy in product(self.solver_list, (True, False)):
            # whiten the data while projecting to the lower dim subspace
            X_ = X.copy()  # make sure we keep an original across iterations.
            pca = PCA(n_components=n_components,
                      whiten=True,
                      copy=copy,
                      svd_solver=solver,
                      random_state=0,
                      iterated_power=7)
            # test fit_transform
            X_whitened = pca.fit_transform(X_.copy())
            self.assertEqual(X_whitened.shape, (n_samples, n_components))
            X_whitened2 = pca.transform(X_)
            assert_array_almost_equal(X_whitened.fetch(), X_whitened2.fetch())

            assert_almost_equal(X_whitened.std(ddof=1, axis=0).to_numpy(),
                                np.ones(n_components),
                                decimal=6)
            assert_almost_equal(
                X_whitened.mean(axis=0).to_numpy(), np.zeros(n_components))

            X_ = X.copy()
            pca = PCA(n_components=n_components,
                      whiten=False,
                      copy=copy,
                      svd_solver=solver).fit(X_)
            X_unwhitened = pca.transform(X_)
            self.assertEqual(X_unwhitened.shape, (n_samples, n_components))

            # in that case the output components still have varying variances
            assert_almost_equal(
                X_unwhitened.std(axis=0).std().to_numpy(), 74.1, 1)
Beispiel #2
0
    def testMultipleOutputTensorExecute(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M') as cluster:
            session = cluster.session

            t = mt.random.rand(20, 5, chunk_size=5)
            r = mt.linalg.svd(t)

            res = session.run((t, ) + r, timeout=_exec_timeout)

            U, s, V = res[1:]
            np.testing.assert_allclose(res[0], U.dot(np.diag(s).dot(V)))

            raw = np.random.rand(20, 5)

            # to test the fuse, the graph should be fused
            t = mt.array(raw)
            U, s, V = mt.linalg.svd(t)
            r = U.dot(mt.diag(s).dot(V))

            res = r.execute()
            np.testing.assert_allclose(raw, res)

            # test submit part of svd outputs
            t = mt.array(raw)
            U, s, V = mt.linalg.svd(t)

            with new_session(cluster.endpoint) as session2:
                U_result, s_result = session2.run(U, s, timeout=_exec_timeout)
                U_expected, s_expectd, _ = np.linalg.svd(raw,
                                                         full_matrices=False)

                np.testing.assert_allclose(U_result, U_expected)
                np.testing.assert_allclose(s_result, s_expectd)

            with new_session(cluster.endpoint) as session2:
                U_result, s_result = session2.run(U + 1,
                                                  s + 1,
                                                  timeout=_exec_timeout)
                U_expected, s_expectd, _ = np.linalg.svd(raw,
                                                         full_matrices=False)

                np.testing.assert_allclose(U_result, U_expected + 1)
                np.testing.assert_allclose(s_result, s_expectd + 1)

            with new_session(cluster.endpoint) as session2:
                t = mt.array(raw)
                _, s, _ = mt.linalg.svd(t)
                del _

                s_result = session2.run(s, timeout=_exec_timeout)
                s_expected = np.linalg.svd(raw, full_matrices=False)[1]
                np.testing.assert_allclose(s_result, s_expected)
Beispiel #3
0
    def testDiag(self):
        # test 2-d, shape[0] == shape[1], k == 0
        v = tensor(np.arange(16).reshape(4, 4), chunk_size=2)
        t = diag(v)

        self.assertEqual(t.shape, (4, ))
        self.assertFalse(t.op.gpu)
        t = t.tiles()
        self.assertEqual(t.nsplits, ((2, 2), ))

        v = tensor(np.arange(16).reshape(4, 4), chunk_size=(2, 3))
        t = diag(v)

        self.assertEqual(t.shape, (4, ))
        t = t.tiles()
        self.assertEqual(t.nsplits, ((2, 1, 1), ))

        # test 1-d, k == 0
        v = tensor(np.arange(3), chunk_size=2)
        t = diag(v, sparse=True)

        self.assertEqual(t.shape, (3, 3))
        t = t.tiles()
        self.assertEqual(t.nsplits, ((2, 1), (2, 1)))
        self.assertEqual(
            len([
                c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag'
            ]), 2)
        self.assertTrue(t.chunks[0].op.sparse)

        # test 2-d, shape[0] != shape[1]
        v = tensor(np.arange(24).reshape(4, 6), chunk_size=2)
        t = diag(v)

        self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6)).shape)
        t = t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        v = tensor(np.arange(24).reshape(4, 6), chunk_size=2)

        t = diag(v, k=1)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=1).shape)
        t = t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=2)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=2).shape)
        t = t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=-1)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=-1).shape)
        t = t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=-2)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=-2).shape)
        t = t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        # test tiled zeros' keys
        a = arange(5, chunk_size=2)
        t = diag(a)
        t = t.tiles()
        # 1 and 2 of t.chunks is ones, they have different shapes
        self.assertNotEqual(t.chunks[1].op.key, t.chunks[2].op.key)
Beispiel #4
0
def test_diag():
    # test 2-d, shape[0] == shape[1], k == 0
    v = tensor(np.arange(16).reshape(4, 4), chunk_size=2)
    t = diag(v)

    assert t.shape == (4, )
    assert t.op.gpu is False
    t = tile(t)
    assert t.nsplits == ((2, 2), )

    v = tensor(np.arange(16).reshape(4, 4), chunk_size=(2, 3))
    t = diag(v)

    assert t.shape == (4, )
    t = tile(t)
    assert t.nsplits == ((2, 1, 1), )

    # test 1-d, k == 0
    v = tensor(np.arange(3), chunk_size=2)
    t = diag(v, sparse=True)

    assert t.shape == (3, 3)
    t = tile(t)
    assert t.nsplits == ((2, 1), (2, 1))
    assert len(
        [c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag']) == 2
    assert t.chunks[0].op.sparse is True

    # test 2-d, shape[0] != shape[1]
    v = tensor(np.arange(24).reshape(4, 6), chunk_size=2)
    t = diag(v)

    assert t.shape == np.diag(np.arange(24).reshape(4, 6)).shape
    t = tile(t)
    assert tuple(sum(s) for s in t.nsplits) == t.shape

    v = tensor(np.arange(24).reshape(4, 6), chunk_size=2)

    t = diag(v, k=1)
    assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=1).shape
    t = tile(t)
    assert tuple(sum(s) for s in t.nsplits) == t.shape

    t = diag(v, k=2)
    assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=2).shape
    t = tile(t)
    assert tuple(sum(s) for s in t.nsplits) == t.shape

    t = diag(v, k=-1)
    assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=-1).shape
    t = tile(t)
    assert tuple(sum(s) for s in t.nsplits) == t.shape

    t = diag(v, k=-2)
    assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=-2).shape
    t = tile(t)
    assert tuple(sum(s) for s in t.nsplits) == t.shape

    # test tiled zeros' keys
    a = arange(5, chunk_size=2)
    t = diag(a)
    t = tile(t)
    # 1 and 2 of t.chunks is ones, they have different shapes
    assert t.chunks[1].op.key != t.chunks[2].op.key
Beispiel #5
0
    def testDiag(self):
        # test 2-d, shape[0] == shape[1], k == 0
        v = tensor(np.arange(16).reshape(4, 4), chunks=2)
        t = diag(v)

        self.assertEqual(t.shape, (4, ))
        t.tiles()
        self.assertEqual(t.nsplits, ((2, 2), ))

        v = tensor(np.arange(16).reshape(4, 4), chunks=(2, 3))
        t = diag(v)

        self.assertEqual(t.shape, (4, ))
        t.tiles()
        self.assertEqual(t.nsplits, ((2, 1, 1), ))

        # test 1-d, k == 0
        v = tensor(np.arange(3), chunks=2)
        t = diag(v, sparse=True)

        self.assertEqual(t.shape, (3, 3))
        t.tiles()
        self.assertEqual(t.nsplits, ((2, 1), (2, 1)))
        self.assertEqual(
            len([
                c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag'
            ]), 2)
        self.assertTrue(t.chunks[0].op.sparse)

        # test 2-d, shape[0] != shape[1]
        v = tensor(np.arange(24).reshape(4, 6), chunks=2)
        t = diag(v)

        self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6)).shape)
        t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        v = tensor(np.arange(24).reshape(4, 6), chunks=2)

        t = diag(v, k=1)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=1).shape)
        t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=2)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=2).shape)
        t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=-1)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=-1).shape)
        t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)

        t = diag(v, k=-2)
        self.assertEqual(t.shape,
                         np.diag(np.arange(24).reshape(4, 6), k=-2).shape)
        t.tiles()
        self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)