def test_PowerMethod_std_method(): N = 1000 P = 100 k = 10 array = da.random.randint(0, 3, size=(N, P)) for method in ['norm', 'binom']: new_array = make_snp_array(array, std_method=method) PM = PowerMethod(k=k, scoring_method='q-vals', tol=1e-13, factor=None) U_PM, S_PM, V_PM = PM.svd(array=new_array) mean = array.mean(axis=0) if method == 'norm': std = array.std(axis=0) else: p = mean / 2 std = da.sqrt(2 * p * (1 - p)) x = (array - mean).dot(np.diag(1 / std)) U, S, V = da.linalg.svd(x) U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k) np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k), 0, decimal=3) np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k), 0, decimal=3) np.testing.assert_array_almost_equal(S_k, S_PM, decimal=2)
def test_PowerMethod_all_tols_agree(): n = 100 p = 80 k = 10 array = np.random.rand(n, p) PM = PowerMethod(k=k, tol=1e-9, scoring_method='q-vals', max_iter=100, lmbd=0) U_q, S_q, V_q = PM.svd(array) PM = PowerMethod(k=k, tol=1e-4, scoring_method='rmse', max_iter=100, lmbd=0) U_r, S_r, V_r = PM.svd(array) PM = PowerMethod(k=k, tol=1e-9, scoring_method='v-subspace', max_iter=100, lmbd=0) U_v, S_v, V_v = PM.svd(array) np.testing.assert_array_almost_equal(S_q, S_r) np.testing.assert_array_almost_equal(S_q, S_v) np.testing.assert_almost_equal(subspace_dist(U_q, U_r, S_q), 0) np.testing.assert_almost_equal(subspace_dist(U_q, U_v, S_q), 0) np.testing.assert_almost_equal(subspace_dist(V_q, V_r, S_q), 0) np.testing.assert_almost_equal(subspace_dist(V_q, V_v, S_q), 0)
def test_PowerMethod_case1(): n = 100 p = 80 array = np.random.rand(100, 80) mu = array.mean(axis=0) std = np.diag(1 / array.std(axis=0)) scaled_centered_array = (array - mu).dot(std) U, S, V = np.linalg.svd(scaled_centered_array, full_matrices=False) # Ground Truth array = make_snp_array(da.array(array), mean=True, std=True, std_method='norm', mask_nan=False, dtype='float64') for k in range(1, 10): U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :] PM = PowerMethod(k=k, tol=1e-9, scoring_method='rmse', max_iter=100, sub_svd_start=False, init_row_sampling_factor=1, factor=None, lmbd=0) U_k_PM, S_k_PM, V_k_PM = PM.svd(array) np.testing.assert_array_almost_equal(S_k, S_k_PM) assert V_k.shape == V_k_PM.shape == (k, p) assert U_k.shape == U_k_PM.shape == (n, k) np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0) np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
def test_v_init(): N, P = 100, 40 a = da.array(np.random.randn(N, P)) U, S, V = da.linalg.svd(a) np.testing.assert_almost_equal(subspace_dist(U, v_init(a, V), S), 0) np.testing.assert_almost_equal(subspace_dist(U, v_init(a, V), S), 0)
def test_sub_svd_init_warm_start_row_factor(): N, P = 100, 40 k = 10 a = da.array(np.random.randn(N, P)) U, S, V = da.linalg.svd(a) Uk, Sk = svd_to_trunc_svd(u=U, s=S, k=k) previous_error = 1 for i in range(1, 11, 2): U1 = sub_svd_init(a, k=k, warm_start_row_factor=i, log=0) assert subspace_dist(U1, Uk, Sk) <= previous_error previous_error = subspace_dist(U1, Uk, Sk)
def test_SSPM_case3(): N = 500 k = 10 f = .1 s_orig = np.array([1.01**i for i in range(1, N + 1)]) array = da.diag(s_orig) SSPM = SuccessiveBatchedPowerMethod( k=k, sub_svd_start=True, tol=[1e-14, 1e-14], f=f, scoring_method=['q-vals', 'v-subspace'], factor=None) U_PM, S_PM, V_PM = SSPM.svd(array) np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM) for i, (sub_S, sub_V) in enumerate( zip(SSPM.history.iter['S'], SSPM.history.iter['V'])): s = sorted(s_orig[:int(f * (i + 1) * N)], reverse=True)[:k] np.testing.assert_array_almost_equal(s, sub_S) v = np.zeros_like(sub_V) for j in range(k): v[j, int(f * (i + 1) * N) - k + j] = 1 np.testing.assert_almost_equal(subspace_dist(v, sub_V, sub_S), 0)
def test_subspace_to_V_case1(): for N in range(2, 10): for P in range(2, 10): A = da.random.random(size=(N, P)) U, S, V1 = da.linalg.svd(A) V2 = subspace_to_V(U, A, k=min(N, P)) np.testing.assert_almost_equal(subspace_dist(V1, V2, S), 0, decimal=decimals)
def test_sub_svd_init(): N, P = 100, 40 k = 10 a = da.array(np.random.randn(N, P)) U, S, V = da.linalg.svd(a) Uk, Sk = svd_to_trunc_svd(u=U, s=S, k=k) U1 = sub_svd_init(a, k=k, warm_start_row_factor=10, log=0) np.testing.assert_almost_equal(subspace_dist(U1, Uk, Sk), 0)
def test_SSPM_case1(): N, P, k = 100000, 100, 10 array = np.zeros(shape=(N, P)) s_orig = np.linspace(1, 2, P) array[:P, :] = np.diag(np.linspace(1, 2, P)) array[N - 1, :] = 1 U, S, V = np.linalg.svd(array, full_matrices=False) U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k) SSPM = SuccessiveBatchedPowerMethod(k=k, sub_svd_start=True, tol=1e-12, factor=None) U_PM, S_PM, V_PM = SSPM.svd(array) np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k), 0) np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k), 0) np.testing.assert_array_almost_equal(S_k, S_PM) for sub_S in SSPM.history.iter['S'][:-1]: np.testing.assert_array_almost_equal(s_orig[::-1][:k], sub_S)
def test_PowerMethod_scale_center(): array = np.random.rand(100, 70) mu = array.mean(axis=0) std = np.diag(1 / array.std(axis=0)) k = 10 for scale in [True, False]: for center in [True, False]: new_array = array if center: new_array = new_array - mu if scale: new_array = new_array.dot(std) U, S, _ = np.linalg.svd(new_array.dot(new_array.T), full_matrices=False) # Ground Truth _, _, V = np.linalg.svd(new_array.T.dot(new_array), full_matrices=False) # Ground Truth S = np.sqrt(S) U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :] snp_array = make_snp_array(da.array(array), std=scale, mean=center, std_method='norm', dtype='float64') np.testing.assert_array_almost_equal(new_array, snp_array) PM = PowerMethod(k=k, tol=1e-12, scoring_method='q-vals', max_iter=100, factor=None, lmbd=0) U_q, S_q, V_q = PM.svd(snp_array) assert subspace_dist(U_k, U_q, S_k) <= 1e-8 assert subspace_dist(V_k, V_q, S_k) <= 1e-8 assert np.linalg.norm(S_k - S_q) <= 1e-9
def test_PowerMethod_transpose_array(): array = da.array(np.random.rand(100, 200)) k = 10 U, S, V = da.linalg.svd(array) U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k) PM = PowerMethod(tol=1e-12, k=k, factor=None, scale=False, center=False, lmbd=0) U_PM, S_PM, V_PM = PM.svd(array) np.testing.assert_array_almost_equal(S_k, S_PM) np.testing.assert_almost_equal(subspace_dist(V_k, V_PM, S_PM), 0) assert U_k.shape == U_PM.shape U_PM, S_PM, V_PM = PM.svd(array, transpose=True) np.testing.assert_almost_equal(subspace_dist(U_PM.T, V_k, S_PM), 0) assert V_PM.shape != U_PM.shape
def test_subspace_to_SVD_case3(): """ A = N(0, 1, size = (N,P)) USV = SVD(A) A = np.diag(np.range(N, 0, -1)) """ for N in range(2, 10): for P in range(2, 10): A = da.random.random(size=(N, P)) U, S, V = da.linalg.svd(A) for _ in range(num_runs): subspace_shuffle = da.random.permutation(U.T).T for j in range(2, P): subspace = subspace_shuffle[:, 0:j] U_s, S_s, V_s = subspace_to_SVD(subspace, A, full_v=True) np.testing.assert_almost_equal(subspace_dist(V_s, V, S_s), 0, decimal=decimals)
def test_PowerMethod_subsvd_finds_eigenvectors(): N = 1000 k = 10 s_orig = np.array([1.01**i for i in range(1, N + 1)]) array = da.diag(s_orig) PM = PowerMethod(tol=1e-16, factor=None, lmbd=.1, max_iter=100) U_PM, S_PM, V_PM = PM.svd(array) np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM, decimal=0) # Max Q-Val is 21,000 v = np.zeros_like(V_PM).compute() for j in range(k): v[j, N - k + j] = 1 np.testing.assert_almost_equal(subspace_dist(V_PM, v, S_PM), 0, decimal=5)
def _solution_accuracy(self, x, **kwargs): if any(m in self.scoring_method for m in ['rmse', 'v-subspace']): U_k, S_k, V_k = subspace_to_SVD(x, self.array, sqrt_s=True, k=self.k, full_v=True, log=0) else: U_k, S_k, V_k = subspace_to_SVD(x, self.array, sqrt_s=True, k=self.k, full_v=False, log=0) U_k, S_k, V_k = dask.persist(U_k, S_k, V_k) self.history.iter['last_value'] = (U_k, S_k, V_k) acc_list = [] for method in self.scoring_method: if method == 'q-vals': try: prev_S_k = self.history.iter['S'][-1] acc = q_value_converge(S_k, prev_S_k) except IndexError: acc = float('INF') self.history.iter['S'].append(S_k.compute()) elif method == 'rmse': acc = rmse_k(self.array, U_k, S_k**2, factor=self.factor) else: # method == 'v-subspace' try: prev_V_k = self.history.iter['V'][-1] acc = subspace_dist(V_k.T, prev_V_k.T, S_k) except IndexError: acc = float('INF') self.history.iter['V'].append(V_k.compute()) acc_list.append(acc) return acc_list
def test_PowerMethod_factor(): n = 100 p = 80 array = np.random.rand(n, p) sym_array = array.dot(array.T) for f in ['n', 'p', None]: if f == 'n': factor = n elif f == 'p': factor = p else: factor = 1 U, S, V = np.linalg.svd(sym_array / factor, full_matrices=False) S = np.sqrt(S) k = 10 U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :] array = make_snp_array(da.array(array), mean=False, std=False, std_method='norm', mask_nan=False, dtype='float64') PM = PowerMethod(k=k, tol=1e-9, scoring_method='q-vals', max_iter=100, factor=f, lmbd=0) U_k_PM, S_k_PM, V_k_PM = PM.svd(array) np.testing.assert_array_almost_equal(S_k, S_k_PM) assert U_k.shape == U_k_PM.shape == (n, k) np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
def test_subspace_to_SVD_case2(): """ A = USV U = [e1 e2, ..., ek] \ [0, 0, ..., 0] | N by K [1, 1, ..., 1] / S = np.range(N, 0, -1) V = I A = np.diag(np.range(N, 0, -1)) subspace_to_SVD should recover I and S from sections of A """ for N in range(2, 10): for K in range(2, N + 1): U = np.zeros((N, K)) U[N - 1, :] = np.ones(K) U[:K, :K] = np.eye(K) V = da.eye(K) U = da.array(U) U_q, _ = da.linalg.qr(U) S = da.arange(K, 0, -1) A = U.dot(da.diag(S)) for j in range(K, N + 1): subspace = A[:, 0:j] U_s, S_s, V_s = subspace_to_SVD(subspace, A, full_v=True) np.testing.assert_almost_equal(subspace_dist(V_s, V, S), 0, decimal=decimals) _, l, _ = da.linalg.svd(U_q.dot(U_s.T)) np.testing.assert_almost_equal(l[:K].compute(), np.ones(K)) np.testing.assert_almost_equal(l[K:].compute(), np.zeros(N - K))
def test_PowerMethod_case2(): array = np.random.rand(100, 100) mu = array.mean(axis=0) std = np.diag(1 / array.std(axis=0)) scaled_centered_array = (array - mu).dot(std) array = make_snp_array(da.array(array), mean=True, std=True, std_method='norm', mask_nan=False, dtype='float64') U, S, V = np.linalg.svd(scaled_centered_array.dot(scaled_centered_array.T), full_matrices=False) # Ground Truth _, _, V = np.linalg.svd(scaled_centered_array.T.dot(scaled_centered_array), full_matrices=False) S = np.sqrt(S) k = 10 U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :] previous_S_error = float('inf') previous_U_error = float('inf') previous_V_error = float('inf') for t in np.logspace(0, -12, 20): PM = PowerMethod(k=k, tol=t, scoring_method='q-vals', max_iter=100, factor=None, lmbd=0) U_k_PM, S_k_PM, V_k_PM = PM.svd(array) assert subspace_dist(U_k, U_k_PM, S_k) <= previous_U_error assert subspace_dist(V_k, V_k_PM, S_k) <= previous_V_error assert np.linalg.norm(S_k - S_k_PM) <= previous_S_error previous_S_error = np.linalg.norm(S_k - S_k_PM) previous_U_error = subspace_dist(U_k, U_k_PM, S_k) previous_V_error = subspace_dist(V_k, V_k_PM, S_k) assert subspace_dist(U_k, U_k_PM, S_k) <= 1e-9 assert subspace_dist(V_k, V_k_PM, S_k) <= 1e-9 assert np.linalg.norm(S_k - S_k_PM) <= 1e-12