def test_randomized_svd_execution(setup): n_samples = 100 n_features = 500 rank = 5 k = 10 for dtype in (np.int64, np.float64): # generate a matrix X of approximate effective rank `rank` and no noise # component (very structured signal): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.0, random_state=0).astype(dtype, copy=False) assert X.shape == (n_samples, n_features) dtype = np.dtype(dtype) decimal = 5 if dtype == np.float32 else 7 # compute the singular values of X using the slow exact method X_res = X.execute().fetch() U, s, V = np.linalg.svd(X_res, full_matrices=False) # Convert the singular values to the specific dtype U = U.astype(dtype, copy=False) s = s.astype(dtype, copy=False) V = V.astype(dtype, copy=False) for normalizer in ['auto', 'LU', 'QR']: # 'none' would not be stable # compute the singular values of X using the fast approximate method Ua, sa, Va = randomized_svd(X, k, n_iter=1, power_iteration_normalizer=normalizer, random_state=0) # If the input dtype is float, then the output dtype is float of the # same bit size (f32 is not upcast to f64) # But if the input dtype is int, the output dtype is float64 if dtype.kind == 'f': assert Ua.dtype == dtype assert sa.dtype == dtype assert Va.dtype == dtype else: assert Ua.dtype == np.float64 assert sa.dtype == np.float64 assert Va.dtype == np.float64 assert Ua.shape == (n_samples, k) assert sa.shape == (k, ) assert Va.shape == (k, n_features) # ensure that the singular values of both methods are equal up to the # real rank of the matrix sa_res = sa.execute().fetch() np.testing.assert_almost_equal(s[:k], sa_res, decimal=decimal) # check the singular vectors too (while not checking the sign) dot_res = dot(Ua, Va).execute().fetch() np.testing.assert_almost_equal(np.dot(U[:, :k], V[:k, :]), dot_res, decimal=decimal)
def testMakeLowRankMatrix(self): X = make_low_rank_matrix(n_samples=50, n_features=25, effective_rank=5, tail_strength=0.01, random_state=0) self.assertEquals(X.shape, (50, 25), "X shape mismatch") _, s, _ = svd(X) self.assertLess((s.sum() - 5).execute(n_parallel=1), 0.1, "X rank is not approximately 5")
def test_make_low_rank_matrix(setup): X = make_low_rank_matrix(n_samples=50, n_features=25, effective_rank=5, tail_strength=0.01, random_state=0) assert X.shape == (50, 25) _, s, _ = svd(X) assert (s.sum() - 5).to_numpy() < 0.1