Beispiel #1
0
def test_PowerMethod_std_method():
    N = 1000
    P = 100
    k = 10
    array = da.random.randint(0, 3, size=(N, P))
    for method in ['norm', 'binom']:
        new_array = make_snp_array(array, std_method=method)
        PM = PowerMethod(k=k, scoring_method='q-vals', tol=1e-13, factor=None)
        U_PM, S_PM, V_PM = PM.svd(array=new_array)

        mean = array.mean(axis=0)
        if method == 'norm':
            std = array.std(axis=0)
        else:
            p = mean / 2
            std = da.sqrt(2 * p * (1 - p))

        x = (array - mean).dot(np.diag(1 / std))

        U, S, V = da.linalg.svd(x)
        U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)
        np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_array_almost_equal(S_k, S_PM, decimal=2)
Beispiel #2
0
def test_PowerMethod_all_tols_agree():
    n = 100
    p = 80
    k = 10
    array = np.random.rand(n, p)

    PM = PowerMethod(k=k,
                     tol=1e-9,
                     scoring_method='q-vals',
                     max_iter=100,
                     lmbd=0)
    U_q, S_q, V_q = PM.svd(array)

    PM = PowerMethod(k=k,
                     tol=1e-4,
                     scoring_method='rmse',
                     max_iter=100,
                     lmbd=0)
    U_r, S_r, V_r = PM.svd(array)

    PM = PowerMethod(k=k,
                     tol=1e-9,
                     scoring_method='v-subspace',
                     max_iter=100,
                     lmbd=0)
    U_v, S_v, V_v = PM.svd(array)

    np.testing.assert_array_almost_equal(S_q, S_r)
    np.testing.assert_array_almost_equal(S_q, S_v)

    np.testing.assert_almost_equal(subspace_dist(U_q, U_r, S_q), 0)
    np.testing.assert_almost_equal(subspace_dist(U_q, U_v, S_q), 0)

    np.testing.assert_almost_equal(subspace_dist(V_q, V_r, S_q), 0)
    np.testing.assert_almost_equal(subspace_dist(V_q, V_v, S_q), 0)
Beispiel #3
0
def test_PowerMethod_case1():
    n = 100
    p = 80
    array = np.random.rand(100, 80)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    U, S, V = np.linalg.svd(scaled_centered_array,
                            full_matrices=False)  # Ground Truth
    array = make_snp_array(da.array(array),
                           mean=True,
                           std=True,
                           std_method='norm',
                           mask_nan=False,
                           dtype='float64')
    for k in range(1, 10):
        U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

        PM = PowerMethod(k=k,
                         tol=1e-9,
                         scoring_method='rmse',
                         max_iter=100,
                         sub_svd_start=False,
                         init_row_sampling_factor=1,
                         factor=None,
                         lmbd=0)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        np.testing.assert_array_almost_equal(S_k, S_k_PM)
        assert V_k.shape == V_k_PM.shape == (k, p)
        assert U_k.shape == U_k_PM.shape == (n, k)
        np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0)
        np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
Beispiel #4
0
def test_v_init():
    N, P = 100, 40
    a = da.array(np.random.randn(N, P))

    U, S, V = da.linalg.svd(a)

    np.testing.assert_almost_equal(subspace_dist(U, v_init(a, V), S), 0)
    np.testing.assert_almost_equal(subspace_dist(U, v_init(a, V), S), 0)
Beispiel #5
0
def test_sub_svd_init_warm_start_row_factor():
    N, P = 100, 40
    k = 10
    a = da.array(np.random.randn(N, P))
    U, S, V = da.linalg.svd(a)
    Uk, Sk = svd_to_trunc_svd(u=U, s=S, k=k)

    previous_error = 1
    for i in range(1, 11, 2):
        U1 = sub_svd_init(a, k=k, warm_start_row_factor=i, log=0)

        assert subspace_dist(U1, Uk, Sk) <= previous_error
        previous_error = subspace_dist(U1, Uk, Sk)
Beispiel #6
0
def test_SSPM_case3():
    N = 500
    k = 10
    f = .1
    s_orig = np.array([1.01**i for i in range(1, N + 1)])
    array = da.diag(s_orig)

    SSPM = SuccessiveBatchedPowerMethod(
        k=k,
        sub_svd_start=True,
        tol=[1e-14, 1e-14],
        f=f,
        scoring_method=['q-vals', 'v-subspace'],
        factor=None)

    U_PM, S_PM, V_PM = SSPM.svd(array)

    np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM)

    for i, (sub_S, sub_V) in enumerate(
            zip(SSPM.history.iter['S'], SSPM.history.iter['V'])):
        s = sorted(s_orig[:int(f * (i + 1) * N)], reverse=True)[:k]
        np.testing.assert_array_almost_equal(s, sub_S)

        v = np.zeros_like(sub_V)
        for j in range(k):
            v[j, int(f * (i + 1) * N) - k + j] = 1

        np.testing.assert_almost_equal(subspace_dist(v, sub_V, sub_S), 0)
Beispiel #7
0
def test_subspace_to_V_case1():
    for N in range(2, 10):
        for P in range(2, 10):
            A = da.random.random(size=(N, P))
            U, S, V1 = da.linalg.svd(A)
            V2 = subspace_to_V(U, A, k=min(N, P))

            np.testing.assert_almost_equal(subspace_dist(V1, V2, S), 0, decimal=decimals)
Beispiel #8
0
def test_sub_svd_init():
    N, P = 100, 40
    k = 10
    a = da.array(np.random.randn(N, P))
    U, S, V = da.linalg.svd(a)
    Uk, Sk = svd_to_trunc_svd(u=U, s=S, k=k)

    U1 = sub_svd_init(a, k=k, warm_start_row_factor=10, log=0)

    np.testing.assert_almost_equal(subspace_dist(U1, Uk, Sk), 0)
Beispiel #9
0
def test_SSPM_case1():
    N, P, k = 100000, 100, 10
    array = np.zeros(shape=(N, P))
    s_orig = np.linspace(1, 2, P)
    array[:P, :] = np.diag(np.linspace(1, 2, P))
    array[N - 1, :] = 1

    U, S, V = np.linalg.svd(array, full_matrices=False)
    U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)

    SSPM = SuccessiveBatchedPowerMethod(k=k,
                                        sub_svd_start=True,
                                        tol=1e-12,
                                        factor=None)
    U_PM, S_PM, V_PM = SSPM.svd(array)

    np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k), 0)
    np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k), 0)
    np.testing.assert_array_almost_equal(S_k, S_PM)

    for sub_S in SSPM.history.iter['S'][:-1]:
        np.testing.assert_array_almost_equal(s_orig[::-1][:k], sub_S)
Beispiel #10
0
def test_PowerMethod_scale_center():
    array = np.random.rand(100, 70)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    k = 10
    for scale in [True, False]:
        for center in [True, False]:
            new_array = array
            if center:
                new_array = new_array - mu
            if scale:
                new_array = new_array.dot(std)

            U, S, _ = np.linalg.svd(new_array.dot(new_array.T),
                                    full_matrices=False)  # Ground Truth
            _, _, V = np.linalg.svd(new_array.T.dot(new_array),
                                    full_matrices=False)  # Ground Truth
            S = np.sqrt(S)
            U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

            snp_array = make_snp_array(da.array(array),
                                       std=scale,
                                       mean=center,
                                       std_method='norm',
                                       dtype='float64')

            np.testing.assert_array_almost_equal(new_array, snp_array)
            PM = PowerMethod(k=k,
                             tol=1e-12,
                             scoring_method='q-vals',
                             max_iter=100,
                             factor=None,
                             lmbd=0)
            U_q, S_q, V_q = PM.svd(snp_array)

            assert subspace_dist(U_k, U_q, S_k) <= 1e-8
            assert subspace_dist(V_k, V_q, S_k) <= 1e-8
            assert np.linalg.norm(S_k - S_q) <= 1e-9
Beispiel #11
0
def test_PowerMethod_transpose_array():
    array = da.array(np.random.rand(100, 200))
    k = 10
    U, S, V = da.linalg.svd(array)

    U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)

    PM = PowerMethod(tol=1e-12,
                     k=k,
                     factor=None,
                     scale=False,
                     center=False,
                     lmbd=0)

    U_PM, S_PM, V_PM = PM.svd(array)

    np.testing.assert_array_almost_equal(S_k, S_PM)
    np.testing.assert_almost_equal(subspace_dist(V_k, V_PM, S_PM), 0)
    assert U_k.shape == U_PM.shape

    U_PM, S_PM, V_PM = PM.svd(array, transpose=True)

    np.testing.assert_almost_equal(subspace_dist(U_PM.T, V_k, S_PM), 0)
    assert V_PM.shape != U_PM.shape
Beispiel #12
0
def test_subspace_to_SVD_case3():
    """
    A = N(0, 1, size = (N,P))

    USV = SVD(A)

    A = np.diag(np.range(N, 0, -1))

    """
    for N in range(2, 10):
        for P in range(2, 10):
            A = da.random.random(size=(N, P))
            U, S, V = da.linalg.svd(A)
            for _ in range(num_runs):
                subspace_shuffle = da.random.permutation(U.T).T
                for j in range(2, P):
                    subspace = subspace_shuffle[:, 0:j]
                    U_s, S_s, V_s = subspace_to_SVD(subspace, A, full_v=True)
                    np.testing.assert_almost_equal(subspace_dist(V_s, V, S_s), 0, decimal=decimals)
Beispiel #13
0
def test_PowerMethod_subsvd_finds_eigenvectors():
    N = 1000
    k = 10
    s_orig = np.array([1.01**i for i in range(1, N + 1)])
    array = da.diag(s_orig)

    PM = PowerMethod(tol=1e-16, factor=None, lmbd=.1, max_iter=100)

    U_PM, S_PM, V_PM = PM.svd(array)

    np.testing.assert_array_almost_equal(s_orig[-k:][::-1], S_PM,
                                         decimal=0)  # Max Q-Val is 21,000

    v = np.zeros_like(V_PM).compute()

    for j in range(k):
        v[j, N - k + j] = 1

    np.testing.assert_almost_equal(subspace_dist(V_PM, v, S_PM), 0, decimal=5)
Beispiel #14
0
    def _solution_accuracy(self, x, **kwargs):
        if any(m in self.scoring_method for m in ['rmse', 'v-subspace']):
            U_k, S_k, V_k = subspace_to_SVD(x,
                                            self.array,
                                            sqrt_s=True,
                                            k=self.k,
                                            full_v=True,
                                            log=0)
        else:
            U_k, S_k, V_k = subspace_to_SVD(x,
                                            self.array,
                                            sqrt_s=True,
                                            k=self.k,
                                            full_v=False,
                                            log=0)

        U_k, S_k, V_k = dask.persist(U_k, S_k, V_k)

        self.history.iter['last_value'] = (U_k, S_k, V_k)
        acc_list = []
        for method in self.scoring_method:
            if method == 'q-vals':
                try:
                    prev_S_k = self.history.iter['S'][-1]
                    acc = q_value_converge(S_k, prev_S_k)
                except IndexError:
                    acc = float('INF')
                self.history.iter['S'].append(S_k.compute())
            elif method == 'rmse':
                acc = rmse_k(self.array, U_k, S_k**2, factor=self.factor)
            else:  # method == 'v-subspace'
                try:
                    prev_V_k = self.history.iter['V'][-1]
                    acc = subspace_dist(V_k.T, prev_V_k.T, S_k)
                except IndexError:
                    acc = float('INF')
                self.history.iter['V'].append(V_k.compute())
            acc_list.append(acc)
        return acc_list
Beispiel #15
0
def test_PowerMethod_factor():
    n = 100
    p = 80
    array = np.random.rand(n, p)
    sym_array = array.dot(array.T)

    for f in ['n', 'p', None]:
        if f == 'n':
            factor = n
        elif f == 'p':
            factor = p
        else:
            factor = 1

        U, S, V = np.linalg.svd(sym_array / factor, full_matrices=False)
        S = np.sqrt(S)
        k = 10
        U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

        array = make_snp_array(da.array(array),
                               mean=False,
                               std=False,
                               std_method='norm',
                               mask_nan=False,
                               dtype='float64')
        PM = PowerMethod(k=k,
                         tol=1e-9,
                         scoring_method='q-vals',
                         max_iter=100,
                         factor=f,
                         lmbd=0)

        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        np.testing.assert_array_almost_equal(S_k, S_k_PM)
        assert U_k.shape == U_k_PM.shape == (n, k)
        np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
Beispiel #16
0
def test_subspace_to_SVD_case2():
    """
    A = USV

    U = [e1 e2, ..., ek] \
        [0,  0, ...,  0] | N by K
        [1,  1, ...,  1] /


    S = np.range(N, 0, -1)
    V = I

    A = np.diag(np.range(N, 0, -1))

    subspace_to_SVD should recover I and S from sections of A
    """
    for N in range(2, 10):
        for K in range(2, N + 1):
            U = np.zeros((N, K))
            U[N - 1, :] = np.ones(K)
            U[:K, :K] = np.eye(K)
            V = da.eye(K)

            U = da.array(U)
            U_q, _ = da.linalg.qr(U)

            S = da.arange(K, 0, -1)

            A = U.dot(da.diag(S))
            for j in range(K, N + 1):
                subspace = A[:, 0:j]
                U_s, S_s, V_s = subspace_to_SVD(subspace, A, full_v=True)
                np.testing.assert_almost_equal(subspace_dist(V_s, V, S), 0, decimal=decimals)
                _, l, _ = da.linalg.svd(U_q.dot(U_s.T))
                np.testing.assert_almost_equal(l[:K].compute(), np.ones(K))
                np.testing.assert_almost_equal(l[K:].compute(), np.zeros(N - K))
Beispiel #17
0
def test_PowerMethod_case2():
    array = np.random.rand(100, 100)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    array = make_snp_array(da.array(array),
                           mean=True,
                           std=True,
                           std_method='norm',
                           mask_nan=False,
                           dtype='float64')
    U, S, V = np.linalg.svd(scaled_centered_array.dot(scaled_centered_array.T),
                            full_matrices=False)  # Ground Truth
    _, _, V = np.linalg.svd(scaled_centered_array.T.dot(scaled_centered_array),
                            full_matrices=False)

    S = np.sqrt(S)
    k = 10
    U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]
    previous_S_error = float('inf')
    previous_U_error = float('inf')
    previous_V_error = float('inf')
    for t in np.logspace(0, -12, 20):

        PM = PowerMethod(k=k,
                         tol=t,
                         scoring_method='q-vals',
                         max_iter=100,
                         factor=None,
                         lmbd=0)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        assert subspace_dist(U_k, U_k_PM, S_k) <= previous_U_error
        assert subspace_dist(V_k, V_k_PM, S_k) <= previous_V_error
        assert np.linalg.norm(S_k - S_k_PM) <= previous_S_error
        previous_S_error = np.linalg.norm(S_k - S_k_PM)
        previous_U_error = subspace_dist(U_k, U_k_PM, S_k)
        previous_V_error = subspace_dist(V_k, V_k_PM, S_k)

    assert subspace_dist(U_k, U_k_PM, S_k) <= 1e-9
    assert subspace_dist(V_k, V_k_PM, S_k) <= 1e-9
    assert np.linalg.norm(S_k - S_k_PM) <= 1e-12