def testWhitening(self): # Check that PCA output has unit-variance rng = np.random.RandomState(0) n_samples = 100 n_features = 80 n_components = 30 rank = 50 # some low rank data with correlated features X = mt.dot( rng.randn(n_samples, rank), mt.dot(mt.diag(mt.linspace(10.0, 1.0, rank)), rng.randn(rank, n_features))) # the component-wise variance of the first 50 features is 3 times the # mean component-wise variance of the remaining 30 features X[:, :50] *= 3 self.assertEqual(X.shape, (n_samples, n_features)) # the component-wise variance is thus highly varying: self.assertGreater(X.std(axis=0).std().to_numpy(), 43.8) for solver, copy in product(self.solver_list, (True, False)): # whiten the data while projecting to the lower dim subspace X_ = X.copy() # make sure we keep an original across iterations. pca = PCA(n_components=n_components, whiten=True, copy=copy, svd_solver=solver, random_state=0, iterated_power=7) # test fit_transform X_whitened = pca.fit_transform(X_.copy()) self.assertEqual(X_whitened.shape, (n_samples, n_components)) X_whitened2 = pca.transform(X_) assert_array_almost_equal(X_whitened.fetch(), X_whitened2.fetch()) assert_almost_equal(X_whitened.std(ddof=1, axis=0).to_numpy(), np.ones(n_components), decimal=6) assert_almost_equal( X_whitened.mean(axis=0).to_numpy(), np.zeros(n_components)) X_ = X.copy() pca = PCA(n_components=n_components, whiten=False, copy=copy, svd_solver=solver).fit(X_) X_unwhitened = pca.transform(X_) self.assertEqual(X_unwhitened.shape, (n_samples, n_components)) # in that case the output components still have varying variances assert_almost_equal( X_unwhitened.std(axis=0).std().to_numpy(), 74.1, 1)
def testMultipleOutputTensorExecute(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session t = mt.random.rand(20, 5, chunk_size=5) r = mt.linalg.svd(t) res = session.run((t, ) + r, timeout=_exec_timeout) U, s, V = res[1:] np.testing.assert_allclose(res[0], U.dot(np.diag(s).dot(V))) raw = np.random.rand(20, 5) # to test the fuse, the graph should be fused t = mt.array(raw) U, s, V = mt.linalg.svd(t) r = U.dot(mt.diag(s).dot(V)) res = r.execute() np.testing.assert_allclose(raw, res) # test submit part of svd outputs t = mt.array(raw) U, s, V = mt.linalg.svd(t) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U, s, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected) np.testing.assert_allclose(s_result, s_expectd) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U + 1, s + 1, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected + 1) np.testing.assert_allclose(s_result, s_expectd + 1) with new_session(cluster.endpoint) as session2: t = mt.array(raw) _, s, _ = mt.linalg.svd(t) del _ s_result = session2.run(s, timeout=_exec_timeout) s_expected = np.linalg.svd(raw, full_matrices=False)[1] np.testing.assert_allclose(s_result, s_expected)
def testDiag(self): # test 2-d, shape[0] == shape[1], k == 0 v = tensor(np.arange(16).reshape(4, 4), chunk_size=2) t = diag(v) self.assertEqual(t.shape, (4, )) self.assertFalse(t.op.gpu) t = t.tiles() self.assertEqual(t.nsplits, ((2, 2), )) v = tensor(np.arange(16).reshape(4, 4), chunk_size=(2, 3)) t = diag(v) self.assertEqual(t.shape, (4, )) t = t.tiles() self.assertEqual(t.nsplits, ((2, 1, 1), )) # test 1-d, k == 0 v = tensor(np.arange(3), chunk_size=2) t = diag(v, sparse=True) self.assertEqual(t.shape, (3, 3)) t = t.tiles() self.assertEqual(t.nsplits, ((2, 1), (2, 1))) self.assertEqual( len([ c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag' ]), 2) self.assertTrue(t.chunks[0].op.sparse) # test 2-d, shape[0] != shape[1] v = tensor(np.arange(24).reshape(4, 6), chunk_size=2) t = diag(v) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6)).shape) t = t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) v = tensor(np.arange(24).reshape(4, 6), chunk_size=2) t = diag(v, k=1) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=1).shape) t = t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=2) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=2).shape) t = t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=-1) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=-1).shape) t = t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=-2) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=-2).shape) t = t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) # test tiled zeros' keys a = arange(5, chunk_size=2) t = diag(a) t = t.tiles() # 1 and 2 of t.chunks is ones, they have different shapes self.assertNotEqual(t.chunks[1].op.key, t.chunks[2].op.key)
def test_diag(): # test 2-d, shape[0] == shape[1], k == 0 v = tensor(np.arange(16).reshape(4, 4), chunk_size=2) t = diag(v) assert t.shape == (4, ) assert t.op.gpu is False t = tile(t) assert t.nsplits == ((2, 2), ) v = tensor(np.arange(16).reshape(4, 4), chunk_size=(2, 3)) t = diag(v) assert t.shape == (4, ) t = tile(t) assert t.nsplits == ((2, 1, 1), ) # test 1-d, k == 0 v = tensor(np.arange(3), chunk_size=2) t = diag(v, sparse=True) assert t.shape == (3, 3) t = tile(t) assert t.nsplits == ((2, 1), (2, 1)) assert len( [c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag']) == 2 assert t.chunks[0].op.sparse is True # test 2-d, shape[0] != shape[1] v = tensor(np.arange(24).reshape(4, 6), chunk_size=2) t = diag(v) assert t.shape == np.diag(np.arange(24).reshape(4, 6)).shape t = tile(t) assert tuple(sum(s) for s in t.nsplits) == t.shape v = tensor(np.arange(24).reshape(4, 6), chunk_size=2) t = diag(v, k=1) assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=1).shape t = tile(t) assert tuple(sum(s) for s in t.nsplits) == t.shape t = diag(v, k=2) assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=2).shape t = tile(t) assert tuple(sum(s) for s in t.nsplits) == t.shape t = diag(v, k=-1) assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=-1).shape t = tile(t) assert tuple(sum(s) for s in t.nsplits) == t.shape t = diag(v, k=-2) assert t.shape == np.diag(np.arange(24).reshape(4, 6), k=-2).shape t = tile(t) assert tuple(sum(s) for s in t.nsplits) == t.shape # test tiled zeros' keys a = arange(5, chunk_size=2) t = diag(a) t = tile(t) # 1 and 2 of t.chunks is ones, they have different shapes assert t.chunks[1].op.key != t.chunks[2].op.key
def testDiag(self): # test 2-d, shape[0] == shape[1], k == 0 v = tensor(np.arange(16).reshape(4, 4), chunks=2) t = diag(v) self.assertEqual(t.shape, (4, )) t.tiles() self.assertEqual(t.nsplits, ((2, 2), )) v = tensor(np.arange(16).reshape(4, 4), chunks=(2, 3)) t = diag(v) self.assertEqual(t.shape, (4, )) t.tiles() self.assertEqual(t.nsplits, ((2, 1, 1), )) # test 1-d, k == 0 v = tensor(np.arange(3), chunks=2) t = diag(v, sparse=True) self.assertEqual(t.shape, (3, 3)) t.tiles() self.assertEqual(t.nsplits, ((2, 1), (2, 1))) self.assertEqual( len([ c for c in t.chunks if c.op.__class__.__name__ == 'TensorDiag' ]), 2) self.assertTrue(t.chunks[0].op.sparse) # test 2-d, shape[0] != shape[1] v = tensor(np.arange(24).reshape(4, 6), chunks=2) t = diag(v) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6)).shape) t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) v = tensor(np.arange(24).reshape(4, 6), chunks=2) t = diag(v, k=1) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=1).shape) t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=2) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=2).shape) t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=-1) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=-1).shape) t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape) t = diag(v, k=-2) self.assertEqual(t.shape, np.diag(np.arange(24).reshape(4, 6), k=-2).shape) t.tiles() self.assertEqual(tuple(sum(s) for s in t.nsplits), t.shape)