Beispiel #1
0
def srm_reduce_data(X, n_components):
    """
    Reduce the number of features in X via FastSRM
    Parameters
    ----------
    X : np array of shape (n_groups, n_features, n_samples)
        Training vector, where n_groups is the number of groups,
        n_samples is the number of samples and
        n_components is the number of components.
    n_components : int, optional
        Number of components to extract.
        If None, no dimension reduction is performed
    Returns
    -------
    projection: np array of shape (n_groups, n_components, n_features)
        The projection matrix that projects data in reduced space
    reduced: np array of shape (n_groups, n_components, n_samples)
        Reduced data
    """
    srm = IdentifiableFastSRM(
        n_components=n_components,
        tol=1e-10,
        aggregate=None,
        identifiability="decorr",
    )
    S = np.array(srm.fit_transform([x for x in X]))
    W = np.array(srm.basis_list)
    return W, S
def test_random_state():
    n_voxels = 500
    n_timeframes = 10
    n_subjects = 4
    X_train = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]
    srm = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=None,
    )
    srm2 = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=0,
    )
    srm3 = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=0,
    )
    S1 = srm.fit_transform(X_train)
    S2 = srm2.fit_transform(X_train)
    S3 = srm3.fit_transform(X_train)

    print(S1)
    print(S2)
    print(S3)
    np.testing.assert_allclose(S2, S3)

    for S in [S2, S3]:
        with pytest.raises(AssertionError, ):
            np.testing.assert_allclose(S, S1)
def test_use_pca():
    for i in tqdm(range(20)):
        X_train = [np.random.rand(100, 10) for _ in range(3)]
        srm = IdentifiableFastSRM(
            n_components=5,
            use_pca=False,
            tol=1e-18,
            identifiability="decorr",
            n_iter=10000,
        )
        A = srm.fit_transform(X_train)

        srm2 = IdentifiableFastSRM(
            n_components=5,
            use_pca=True,
            n_iter=1,
            identifiability="decorr",
            tol=1e-18,
            n_iter_reduced=10000,
        )
        B = srm2.fit_transform(X_train)
        np.testing.assert_array_almost_equal(A, B, 4)
def test_atlas():
    n_voxels = 500
    n_timeframes = 100
    n_subjects = 4
    atlas = np.repeat(np.arange(10), 50)

    X_train = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]
    srm = IdentifiableFastSRM(identifiability=None,
                              atlas=atlas,
                              n_components=5,
                              n_iter=1)
    S = srm.fit_transform(X_train)
    X_pred = srm.inverse_transform(S)
def test_recover_decorr_basis():
    n_voxels = 50
    n_timeframes = [100, 101]
    n_subjects = 4

    X, W, S = generate_decorr_friendly_data(n_voxels, n_timeframes, n_subjects,
                                            None, 0, "list_of_list")
    srm = IdentifiableFastSRM(n_components=3,
                              identifiability="decorr",
                              n_iter=10,
                              aggregate="mean")
    S_pred = srm.fit_transform(X)

    S_pred_full = np.concatenate(S_pred, axis=1)
    S_full = np.concatenate(S, axis=1)

    srm.basis_list = align_basis(srm.basis_list, W)
    for i in range(len(srm.basis_list)):
        assert_array_almost_equal(srm.basis_list[i], W[i])
def test_fastsrm_class_correctness(
    input_format,
    low_ram,
    tempdir,
    atlas,
    n_jobs,
    n_timeframes,
    aggregate,
    identifiability,
):
    with tempfile.TemporaryDirectory() as datadir:
        np.random.seed(0)
        X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes,
                                             n_subjects, datadir, 0,
                                             input_format)

        XX, n_sessions = apply_input_format(X, input_format)

        if tempdir:
            temp_dir = datadir
        else:
            temp_dir = None

        srm = IdentifiableFastSRM(
            identifiability=identifiability,
            n_subjects_ica=n_subjects,
            atlas=atlas,
            n_components=n_components,
            n_iter=1000,
            tol=1e-7,
            temp_dir=temp_dir,
            low_ram=low_ram,
            n_jobs=n_jobs,
            aggregate=aggregate,
        )

        # Check that there is no difference between fit_transform
        # and fit then transform
        shared_response_fittransform = apply_aggregate(srm.fit_transform(X),
                                                       aggregate, input_format)
        prev_basis = srm.basis_list
        # we need to align both basis though...
        srm.basis_list = align_basis(srm.basis_list, prev_basis)

        basis = [safe_load(b) for b in srm.basis_list]
        shared_response_raw = srm.transform(X)
        shared_response = apply_aggregate(shared_response_raw, aggregate,
                                          input_format)

        for j in range(n_sessions):
            assert_array_almost_equal(shared_response_fittransform[j],
                                      shared_response[j], 1)

        # Check that the decomposition works
        for i in range(n_subjects):
            for j in range(n_sessions):
                assert_array_almost_equal(shared_response[j].T.dot(basis[i]),
                                          XX[i][j].T)

        # Check that if we use all subjects but one if gives almost the
        # same shared response
        shared_response_partial_raw = srm.transform(X[1:5],
                                                    subjects_indexes=list(
                                                        range(1, 5)))

        shared_response_partial = apply_aggregate(shared_response_partial_raw,
                                                  aggregate, input_format)
        for j in range(n_sessions):
            assert_array_almost_equal(shared_response_partial[j],
                                      shared_response[j])

        # Check that if we perform add 2 times the same subject we
        # obtain the same decomposition
        srm.add_subjects(X[:1], shared_response_raw)
        assert_array_almost_equal(safe_load(srm.basis_list[0]),
                                  safe_load(srm.basis_list[-1]))