Esempio n. 1
0
    def test_singular_values(self):
        # Check that the PCA output has the correct singular values

        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 80

        X = mt.tensor(rng.randn(n_samples, n_features))

        pca = PCA(n_components=2, svd_solver='full', random_state=rng).fit(X)
        rpca = PCA(n_components=2, svd_solver='randomized',
                   random_state=rng).fit(X)
        assert_array_almost_equal(pca.singular_values_.fetch(),
                                  rpca.singular_values_.fetch(), 1)

        # Compare to the Frobenius norm
        X_pca = pca.transform(X)
        X_rpca = rpca.transform(X)
        assert_array_almost_equal(
            mt.sum(pca.singular_values_**2.0).to_numpy(),
            (mt.linalg.norm(X_pca, "fro")**2.0).to_numpy(), 12)
        assert_array_almost_equal(
            mt.sum(rpca.singular_values_**2.0).to_numpy(),
            (mt.linalg.norm(X_rpca, "fro")**2.0).to_numpy(), 0)

        # Compare to the 2-norms of the score vectors
        assert_array_almost_equal(
            pca.singular_values_.fetch(),
            mt.sqrt(mt.sum(X_pca**2.0, axis=0)).to_numpy(), 12)
        assert_array_almost_equal(
            rpca.singular_values_.fetch(),
            mt.sqrt(mt.sum(X_rpca**2.0, axis=0)).to_numpy(), 2)

        # Set the singular values and see what we get back
        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 110

        X = mt.tensor(rng.randn(n_samples, n_features))

        pca = PCA(n_components=3, svd_solver='full', random_state=rng)
        rpca = PCA(n_components=3, svd_solver='randomized', random_state=rng)
        X_pca = pca.fit_transform(X)

        X_pca /= mt.sqrt(mt.sum(X_pca**2.0, axis=0))
        X_pca[:, 0] *= 3.142
        X_pca[:, 1] *= 2.718

        X_hat = mt.dot(X_pca, pca.components_)
        pca.fit(X_hat)
        rpca.fit(X_hat)
        assert_array_almost_equal(pca.singular_values_.fetch(),
                                  [3.142, 2.718, 1.0], 14)
        assert_array_almost_equal(rpca.singular_values_.fetch(),
                                  [3.142, 2.718, 1.0], 14)
Esempio n. 2
0
    def testMockExecuteSize(self):
        import mars.tensor as mt
        from mars.core.graph import DAG
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        graph_add = DAG()
        input_chunks = []
        for _ in range(2):
            fetch_op = TensorFetch(dtype=np.dtype('int64'))
            inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data
            input_chunks.append(inp_chunk)

        add_op = TensorTreeAdd(args=input_chunks, dtype=np.dtype('int64'))
        add_chunk = add_op.new_chunk(input_chunks,
                                     shape=(100, 100),
                                     dtype=np.dtype('int64')).data
        graph_add.add_node(add_chunk)
        for inp_chunk in input_chunks:
            graph_add.add_node(inp_chunk)
            graph_add.add_edge(inp_chunk, add_chunk)

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 80000)

        for _ in range(3):
            new_add_op = TensorTreeAdd(args=[add_chunk],
                                       dtype=np.dtype('int64'))
            new_add_chunk = new_add_op.new_chunk([add_chunk],
                                                 shape=(100, 100),
                                                 dtype=np.dtype('int64')).data
            graph_add.add_node(new_add_chunk)
            graph_add.add_edge(add_chunk, new_add_chunk)

            add_chunk = new_add_chunk

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 160000)

        a = mt.random.rand(10, 10, chunk_size=10)
        b = a[:, mt.newaxis, :] - a
        r = mt.triu(mt.sqrt(b**2).sum(axis=2))

        executor = Executor()
        res = executor.execute_tensor(r, concat=False, mock=True)
        # larger than maximal memory size in calc procedure
        self.assertGreaterEqual(res[0][0], 800)
        self.assertGreaterEqual(executor.mock_max_memory, 8000)
Esempio n. 3
0
    def testMockExecuteSize(self):
        a = mt.random.rand(10, 10, chunk_size=10)
        b = a[:, mt.newaxis, :] - a
        r = mt.triu(mt.sqrt(b**2).sum(axis=2))

        executor = Executor()
        res = executor.execute_tensor(r, concat=False, mock=True)
        # larger than maximal memory size in calc procedure
        self.assertGreaterEqual(res[0][0], 800)
        self.assertGreaterEqual(res[0][1], 8000)
Esempio n. 4
0
    def testElementwise(self):
        t1 = ones((10000, 5000), chunk_size=500, gpu=True)
        t2 = ones(5000, chunk_size=500, gpu=True)
        t = (t1 - t2) / sqrt(t2 * (1 - t2) * len(t2))

        g = t.build_graph(tiled=True)
        RuntimeOptimizer(g, self.executor._engine).optimize([], False)
        self.assertTrue(any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in g))

        c = next(n for n in g if n.op.__class__.__name__ == 'TensorCpFuseChunk')
        self.assertGreater(len(_evaluate(c)), 1)
Esempio n. 5
0
    def testElementwise(self):
        t1 = ones((10000, 5000), chunk_size=500, gpu=True)
        t2 = ones(5000, chunk_size=500, gpu=True)
        t = (t1 - t2) / sqrt(t2 * (1 - t2) * len(t2))

        g = t.build_graph(tiled=True)
        graph = self.executor._preprocess(g, [])
        self.assertTrue(any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in graph))

        c = next(n for n in graph if n.op.__class__.__name__ == 'TensorCpFuseChunk')
        print(_evaluate(c))
Esempio n. 6
0
    def test_pca_check_projection(self):
        # Test that the projection of data is correct
        rng = np.random.RandomState(0)
        n, p = 100, 3
        X = mt.tensor(rng.randn(n, p) * .1)
        X[:10] += mt.array([3, 4, 5])
        Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5])

        for solver in self.solver_list:
            Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt)
            Yt /= mt.sqrt((Yt**2).sum())

            assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
Esempio n. 7
0
def test_cupy():
    t1 = mt.ones((100, 50), chunk_size=50, gpu=True)
    t2 = mt.ones(50, chunk_size=50, gpu=True)
    t = (t1 - t2) / mt.sqrt(t2 * (1 - t2) * len(t2))

    graph = TileableGraph([t.data])
    next(TileableGraphBuilder(graph).build())
    context = dict()
    chunk_graph_builder = ChunkGraphBuilder(graph,
                                            fuse_enabled=False,
                                            tile_context=context)
    chunk_graph = next(chunk_graph_builder.build())

    CupyRuntimeOptimizer(chunk_graph).optimize()
    assert any(n.op.__class__.__name__ == 'TensorCpFuseChunk'
               for n in chunk_graph)
Esempio n. 8
0
def test_singular_values(setup):
    # Check that the TruncatedSVD output has the correct singular values

    # Set the singular values and see what we get back
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 110

    X = rng.randn(n_samples, n_features)

    rpca = TruncatedSVD(n_components=3,
                        algorithm='randomized',
                        random_state=rng)
    X_rpca = rpca.fit_transform(X)

    X_rpca /= mt.sqrt(mt.sum(X_rpca**2.0, axis=0))
    X_rpca[:, 0] *= 3.142
    X_rpca[:, 1] *= 2.718

    X_hat_rpca = mt.dot(X_rpca, rpca.components_)
    rpca.fit(X_hat_rpca)
    assert_array_almost_equal(rpca.singular_values_.to_numpy(),
                              [3.142, 2.718, 1.0], 14)