예제 #1
0
def test_PowerMethod_std_method():
    N = 1000
    P = 100
    k = 10
    array = da.random.randint(0, 3, size=(N, P))
    for method in ['norm', 'binom']:
        new_array = make_snp_array(array, std_method=method)
        PM = PowerMethod(k=k, scoring_method='q-vals', tol=1e-13, factor=None)
        U_PM, S_PM, V_PM = PM.svd(array=new_array)

        mean = array.mean(axis=0)
        if method == 'norm':
            std = array.std(axis=0)
        else:
            p = mean / 2
            std = da.sqrt(2 * p * (1 - p))

        x = (array - mean).dot(np.diag(1 / std))

        U, S, V = da.linalg.svd(x)
        U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)
        np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_array_almost_equal(S_k, S_PM, decimal=2)
예제 #2
0
def test_PowerMethod_case1():
    n = 100
    p = 80
    array = np.random.rand(100, 80)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    U, S, V = np.linalg.svd(scaled_centered_array,
                            full_matrices=False)  # Ground Truth
    array = make_snp_array(da.array(array),
                           mean=True,
                           std=True,
                           std_method='norm',
                           mask_nan=False,
                           dtype='float64')
    for k in range(1, 10):
        U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

        PM = PowerMethod(k=k,
                         tol=1e-9,
                         scoring_method='rmse',
                         max_iter=100,
                         sub_svd_start=False,
                         init_row_sampling_factor=1,
                         factor=None,
                         lmbd=0)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        np.testing.assert_array_almost_equal(S_k, S_k_PM)
        assert V_k.shape == V_k_PM.shape == (k, p)
        assert U_k.shape == U_k_PM.shape == (n, k)
        np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0)
        np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
예제 #3
0
def test_mask_snp_array_casse1():
    array = np.random.rand(100, 80)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    array = utils.make_snp_array(da.array(array),
                                 mean=True,
                                 std=True,
                                 std_method='norm',
                                 mask_nan=False,
                                 dtype='float64')

    np.testing.assert_array_almost_equal(scaled_centered_array, array)
예제 #4
0
def test_PowerMethod_nan_arrays():
    array = np.random.randn(100, 100)
    for bad_type in [float('nan')]:
        array[0, 0] = bad_type
        for start in [True, False]:
            PM = PowerMethod(sub_svd_start=start, max_iter=2)
            with pytest.raises(np.linalg.LinAlgError):
                _, _, _ = PM.svd(da.array(array))

            clean_array = make_snp_array(da.array(array),
                                         mask_nan=True,
                                         std_method='norm',
                                         dtype='float64')
            _, _, _ = PM.svd(clean_array)
예제 #5
0
def test_make_snp_array_case_binom(shape, threshold):
    assume(shape[0] > 1 and shape[1] > 1)  # Assumes not degenerate 2d Array

    arr = da.random.random(size=shape)
    arr[arr > threshold] = float('nan')

    assume(da.mean(da.mean(da.isnan(arr), axis=0) < 1) == 1)
    # Asserts that every tested arr has at least 1 non-nan value in each column

    snp_array = utils.make_snp_array(arr,
                                     mean=True,
                                     std=True,
                                     std_method='binom',
                                     dtype='float')

    mean = snp_array.mean(axis=0)
    np.testing.assert_array_almost_equal(1 + mean, np.ones(shape[1]))
예제 #6
0
def test_make_snp_array_case_normal(shape, threshold):
    assume(shape[0] > 1 and shape[1] > 1)  # Assumes not degenerate 2d Array

    arr = da.random.random(size=shape)
    arr[arr > threshold] = float('nan')

    assume(da.mean(da.nanstd(arr, axis=0) > 0) == 1)
    # Asserts that every tested arr has a non-zero std for each column

    snp_array = utils.make_snp_array(arr,
                                     mean=True,
                                     std=True,
                                     std_method='norm',
                                     dtype='float')

    np.testing.assert_array_almost_equal(1 + snp_array.mean(axis=0),
                                         np.ones(shape[1]))
예제 #7
0
def test_make_snp_array_case_normal(shape, max_value, mask_nans):
    assume(shape[0] > 1 and shape[1] > 1)  # Assumes not degenerate 2d Array

    arr = da.random.randint(0, max_value, size=shape)
    if mask_nans:
        arr[arr == max_value - 1] = float('nan')

    assume(da.mean(da.nanstd(arr, axis=0) > 0) == 1)
    # Asserts that every tested arr has a non-zero std for each column

    snp_array = utils.make_snp_array(arr,
                                     mean=True,
                                     std=True,
                                     std_method='norm',
                                     mask_nan=mask_nans,
                                     dtype='int8')

    np.testing.assert_array_almost_equal(1 + snp_array.mean(axis=0),
                                         np.ones(shape[1]))
예제 #8
0
def test_PowerMethod_case2():
    array = np.random.rand(100, 100)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    array = make_snp_array(da.array(array),
                           mean=True,
                           std=True,
                           std_method='norm',
                           mask_nan=False,
                           dtype='float64')
    U, S, V = np.linalg.svd(scaled_centered_array.dot(scaled_centered_array.T),
                            full_matrices=False)  # Ground Truth
    _, _, V = np.linalg.svd(scaled_centered_array.T.dot(scaled_centered_array),
                            full_matrices=False)

    S = np.sqrt(S)
    k = 10
    U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]
    previous_S_error = float('inf')
    previous_U_error = float('inf')
    previous_V_error = float('inf')
    for t in np.logspace(0, -12, 20):

        PM = PowerMethod(k=k,
                         tol=t,
                         scoring_method='q-vals',
                         max_iter=100,
                         factor=None,
                         lmbd=0)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        assert subspace_dist(U_k, U_k_PM, S_k) <= previous_U_error
        assert subspace_dist(V_k, V_k_PM, S_k) <= previous_V_error
        assert np.linalg.norm(S_k - S_k_PM) <= previous_S_error
        previous_S_error = np.linalg.norm(S_k - S_k_PM)
        previous_U_error = subspace_dist(U_k, U_k_PM, S_k)
        previous_V_error = subspace_dist(V_k, V_k_PM, S_k)

    assert subspace_dist(U_k, U_k_PM, S_k) <= 1e-9
    assert subspace_dist(V_k, V_k_PM, S_k) <= 1e-9
    assert np.linalg.norm(S_k - S_k_PM) <= 1e-12
예제 #9
0
def test_PowerMethod_scale_center():
    array = np.random.rand(100, 70)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    k = 10
    for scale in [True, False]:
        for center in [True, False]:
            new_array = array
            if center:
                new_array = new_array - mu
            if scale:
                new_array = new_array.dot(std)

            U, S, _ = np.linalg.svd(new_array.dot(new_array.T),
                                    full_matrices=False)  # Ground Truth
            _, _, V = np.linalg.svd(new_array.T.dot(new_array),
                                    full_matrices=False)  # Ground Truth
            S = np.sqrt(S)
            U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

            snp_array = make_snp_array(da.array(array),
                                       std=scale,
                                       mean=center,
                                       std_method='norm',
                                       dtype='float64')

            np.testing.assert_array_almost_equal(new_array, snp_array)
            PM = PowerMethod(k=k,
                             tol=1e-12,
                             scoring_method='q-vals',
                             max_iter=100,
                             factor=None,
                             lmbd=0)
            U_q, S_q, V_q = PM.svd(snp_array)

            assert subspace_dist(U_k, U_q, S_k) <= 1e-8
            assert subspace_dist(V_k, V_q, S_k) <= 1e-8
            assert np.linalg.norm(S_k - S_q) <= 1e-9
예제 #10
0
def test_PowerMethod_factor():
    n = 100
    p = 80
    array = np.random.rand(n, p)
    sym_array = array.dot(array.T)

    for f in ['n', 'p', None]:
        if f == 'n':
            factor = n
        elif f == 'p':
            factor = p
        else:
            factor = 1

        U, S, V = np.linalg.svd(sym_array / factor, full_matrices=False)
        S = np.sqrt(S)
        k = 10
        U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

        array = make_snp_array(da.array(array),
                               mean=False,
                               std=False,
                               std_method='norm',
                               mask_nan=False,
                               dtype='float64')
        PM = PowerMethod(k=k,
                         tol=1e-9,
                         scoring_method='q-vals',
                         max_iter=100,
                         factor=f,
                         lmbd=0)

        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        np.testing.assert_array_almost_equal(S_k, S_k_PM)
        assert U_k.shape == U_k_PM.shape == (n, k)
        np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)