def test_addsubs_wo_fit(tempdir, identifiability):

    with tempfile.TemporaryDirectory() as datadir:
        X, W, S = generate_data(
            n_voxels,
            [24, 25],
            n_subjects,
            n_components,
            datadir,
            0,
            "list_of_list",
        )

        if tempdir:
            temp_dir = datadir
        else:
            temp_dir = None

        srm = IdentifiableFastSRM(
            identifiability=identifiability,
            n_subjects_ica=n_subjects,
            n_components=n_components,
            n_iter=10,
            temp_dir=temp_dir,
        )

        srm.add_subjects(X, S)

        for i in range(len(W)):
            assert_array_almost_equal(safe_load(srm.basis_list[i]), W[i])
Beispiel #2
0
def srm_reduce_data(X, n_components):
    """
    Reduce the number of features in X via FastSRM
    Parameters
    ----------
    X : np array of shape (n_groups, n_features, n_samples)
        Training vector, where n_groups is the number of groups,
        n_samples is the number of samples and
        n_components is the number of components.
    n_components : int, optional
        Number of components to extract.
        If None, no dimension reduction is performed
    Returns
    -------
    projection: np array of shape (n_groups, n_components, n_features)
        The projection matrix that projects data in reduced space
    reduced: np array of shape (n_groups, n_components, n_samples)
        Reduced data
    """
    srm = IdentifiableFastSRM(
        n_components=n_components,
        tol=1e-10,
        aggregate=None,
        identifiability="decorr",
    )
    S = np.array(srm.fit_transform([x for x in X]))
    W = np.array(srm.basis_list)
    return W, S
def test_recover_ica_basis():
    X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects,
                                         None, 0, "list_of_list")
    srm = IdentifiableFastSRM(n_components=3, identifiability="ica")
    srm.fit(X)

    srm.basis_list = align_basis(srm.basis_list, W)
    for i in range(len(srm.basis_list)):
        assert_array_almost_equal(srm.basis_list[i], W[i], 2)
def test_overwriting_memory():
    n_voxels = 500
    n_timeframes = [100, 101]
    n_subjects = 4

    with tempfile.TemporaryDirectory() as datadir:
        X_train = [np.random.rand(100, 10) for _ in range(3)]
        srm = IdentifiableFastSRM(
            identifiability=None,
            n_components=5,
            temp_dir=datadir,
            memory=datadir + "/memory",
        )
        srm.fit(X_train)
        W = [np.load(w) for w in srm.basis_list]

        srm2 = IdentifiableFastSRM(
            identifiability="decorr",
            n_components=5,
            temp_dir=datadir,
            memory=datadir + "/memory",
        )
        srm2.fit(X_train)

        W_ = [np.load(w) for w in srm.basis_list]
        np.testing.assert_array_almost_equal(W, W_)
def test_atlas():
    n_voxels = 500
    n_timeframes = 100
    n_subjects = 4
    atlas = np.repeat(np.arange(10), 50)

    X_train = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]
    srm = IdentifiableFastSRM(identifiability=None,
                              atlas=atlas,
                              n_components=5,
                              n_iter=1)
    S = srm.fit_transform(X_train)
    X_pred = srm.inverse_transform(S)
def test_recover_decorr_basis():
    n_voxels = 50
    n_timeframes = [100, 101]
    n_subjects = 4

    X, W, S = generate_decorr_friendly_data(n_voxels, n_timeframes, n_subjects,
                                            None, 0, "list_of_list")
    srm = IdentifiableFastSRM(n_components=3,
                              identifiability="decorr",
                              n_iter=10,
                              aggregate="mean")
    S_pred = srm.fit_transform(X)

    S_pred_full = np.concatenate(S_pred, axis=1)
    S_full = np.concatenate(S, axis=1)

    srm.basis_list = align_basis(srm.basis_list, W)
    for i in range(len(srm.basis_list)):
        assert_array_almost_equal(srm.basis_list[i], W[i])
def test_temp_files():
    n_voxels = 500
    n_timeframes = 10
    n_subjects = 4
    X_train = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]

    X_train2 = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]
    srm = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=None,
        temp_dir="./temp",
    )
    srm.fit(X_train)
    srm2 = clone(srm)
    srm2.fit(X_train2)
    assert srm2.basis_list[0] != srm.basis_list[0]
    assert srm2.temp_dir_ != srm.temp_dir_
    srm2.clean()
    srm.clean()
    assert len(glob("./temp/*")) == 0
def test_ncomponents():
    X_train = [np.random.rand(100, 20) for _ in range(3)]
    X_test = [np.random.rand(100, 5) for _ in range(3)]

    srm = IdentifiableFastSRM(n_components=10, verbose=False)
    srm.fit(X_train)
    srm.transform(X_test)
def test_convergence():
    n_voxels = 500
    n_timeframes = [100, 101]
    n_subjects = 4

    X = [[np.random.rand(n_voxels, n_t) for n_t in n_timeframes]
         for _ in range(n_subjects)]

    srm = IdentifiableFastSRM(
        atlas=np.arange(n_voxels),
        n_components=3,
        tol=1e-9,
        n_iter=1000,
        n_iter_reduced=1000,
    )
    srm.fit(X)
    assert srm.grads[0][-1] < 1e-5
    assert srm.grads[1][-1] < 1e-5

    tot_loss = np.concatenate([srm.losses[0], srm.losses[1]])
    diff_tot_loss = tot_loss[:-1] - tot_loss[1:]

    assert np.prod(diff_tot_loss > 0) == 1
def test_random_state():
    n_voxels = 500
    n_timeframes = 10
    n_subjects = 4
    X_train = [
        np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects)
    ]
    srm = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=None,
    )
    srm2 = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=0,
    )
    srm3 = IdentifiableFastSRM(
        identifiability=None,
        n_components=5,
        n_iter=1,
        n_iter_reduced=3,
        random_state=0,
    )
    S1 = srm.fit_transform(X_train)
    S2 = srm2.fit_transform(X_train)
    S3 = srm3.fit_transform(X_train)

    print(S1)
    print(S2)
    print(S3)
    np.testing.assert_allclose(S2, S3)

    for S in [S2, S3]:
        with pytest.raises(AssertionError, ):
            np.testing.assert_allclose(S, S1)
def test_use_pca():
    for i in tqdm(range(20)):
        X_train = [np.random.rand(100, 10) for _ in range(3)]
        srm = IdentifiableFastSRM(
            n_components=5,
            use_pca=False,
            tol=1e-18,
            identifiability="decorr",
            n_iter=10000,
        )
        A = srm.fit_transform(X_train)

        srm2 = IdentifiableFastSRM(
            n_components=5,
            use_pca=True,
            n_iter=1,
            identifiability="decorr",
            tol=1e-18,
            n_iter_reduced=10000,
        )
        B = srm2.fit_transform(X_train)
        np.testing.assert_array_almost_equal(A, B, 4)
def test_memory():
    n_voxels = 500
    n_timeframes = [100, 101]
    n_subjects = 4

    with tempfile.TemporaryDirectory() as datadir:
        X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes,
                                             n_subjects, datadir, 0)

        dts = []
        for (low_ram, tempdir, n_jobs, aggregate, identifiability) in [
            (True, True, 1, "mean", "decorr"),
            (False, False, 2, None, "ica"),
            (True, True, 1, "mean", None),
            (False, False, 1, None, None),
        ]:
            if tempdir:
                temp_dir = datadir
            else:
                temp_dir = None

            srm = IdentifiableFastSRM(
                identifiability=identifiability,
                n_subjects_ica=n_subjects,
                atlas=np.arange(n_voxels),
                n_components=n_components,
                n_iter=100,
                n_iter_reduced=100,
                temp_dir=temp_dir,
                low_ram=low_ram,
                tol=0,
                n_jobs=n_jobs,
                aggregate=aggregate,
                memory=datadir + "/memory",
            )
            t0 = time()
            srm.fit(X)
            t1 = time()

            dts.append(t1 - t0)

            shared_response_raw = srm.transform(X)

            # Check inverse transform
            reconstructed_data = srm.inverse_transform(
                shared_response_raw,
                sessions_indexes=[1],
                subjects_indexes=[0, 2],
            )
            for i, ii in enumerate([0, 2]):
                for j, jj in enumerate([1]):
                    assert_array_almost_equal(reconstructed_data[i][j],
                                              safe_load(X[ii][jj]))

            reconstructed_data = srm.inverse_transform(
                shared_response_raw,
                subjects_indexes=None,
                sessions_indexes=None,
            )

            for i in range(len(X)):
                for j in range(len(X[i])):
                    assert_array_almost_equal(reconstructed_data[i][j],
                                              safe_load(X[i][j]))

            srm.clean()

    for i in range(len(dts) - 1):
        assert dts[0] > dts[i + 1]
def test_class_srm_inverse_transform(
    input_format,
    low_ram,
    tempdir,
    atlas,
    n_jobs,
    n_timeframes,
    aggregate,
    identifiability,
):

    with tempfile.TemporaryDirectory() as datadir:
        X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes,
                                             n_subjects, datadir, 0,
                                             input_format)

        if tempdir:
            temp_dir = datadir
        else:
            temp_dir = None

        srm = IdentifiableFastSRM(
            identifiability=identifiability,
            n_subjects_ica=n_subjects,
            atlas=atlas,
            n_components=n_components,
            n_iter=10,
            temp_dir=temp_dir,
            low_ram=low_ram,
            n_jobs=n_jobs,
            aggregate=aggregate,
        )

        srm.fit(X)
        shared_response_raw = srm.transform(X)
        # Check inverse transform
        if input_format == "list_of_array":
            reconstructed_data = srm.inverse_transform(shared_response_raw,
                                                       subjects_indexes=[0, 2])
            for i, ii in enumerate([0, 2]):
                assert_array_almost_equal(reconstructed_data[i], X[ii])

            reconstructed_data = srm.inverse_transform(shared_response_raw,
                                                       subjects_indexes=None)
            for i in range(len(X)):
                assert_array_almost_equal(reconstructed_data[i], X[i])
        else:
            reconstructed_data = srm.inverse_transform(
                shared_response_raw,
                sessions_indexes=[1],
                subjects_indexes=[0, 2],
            )
            for i, ii in enumerate([0, 2]):
                for j, jj in enumerate([1]):
                    assert_array_almost_equal(reconstructed_data[i][j],
                                              safe_load(X[ii][jj]))

            reconstructed_data = srm.inverse_transform(
                shared_response_raw,
                subjects_indexes=None,
                sessions_indexes=None,
            )

            for i in range(len(X)):
                for j in range(len(X[i])):
                    assert_array_almost_equal(reconstructed_data[i][j],
                                              safe_load(X[i][j]))
def test_fastsrm_class_correctness(
    input_format,
    low_ram,
    tempdir,
    atlas,
    n_jobs,
    n_timeframes,
    aggregate,
    identifiability,
):
    with tempfile.TemporaryDirectory() as datadir:
        np.random.seed(0)
        X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes,
                                             n_subjects, datadir, 0,
                                             input_format)

        XX, n_sessions = apply_input_format(X, input_format)

        if tempdir:
            temp_dir = datadir
        else:
            temp_dir = None

        srm = IdentifiableFastSRM(
            identifiability=identifiability,
            n_subjects_ica=n_subjects,
            atlas=atlas,
            n_components=n_components,
            n_iter=1000,
            tol=1e-7,
            temp_dir=temp_dir,
            low_ram=low_ram,
            n_jobs=n_jobs,
            aggregate=aggregate,
        )

        # Check that there is no difference between fit_transform
        # and fit then transform
        shared_response_fittransform = apply_aggregate(srm.fit_transform(X),
                                                       aggregate, input_format)
        prev_basis = srm.basis_list
        # we need to align both basis though...
        srm.basis_list = align_basis(srm.basis_list, prev_basis)

        basis = [safe_load(b) for b in srm.basis_list]
        shared_response_raw = srm.transform(X)
        shared_response = apply_aggregate(shared_response_raw, aggregate,
                                          input_format)

        for j in range(n_sessions):
            assert_array_almost_equal(shared_response_fittransform[j],
                                      shared_response[j], 1)

        # Check that the decomposition works
        for i in range(n_subjects):
            for j in range(n_sessions):
                assert_array_almost_equal(shared_response[j].T.dot(basis[i]),
                                          XX[i][j].T)

        # Check that if we use all subjects but one if gives almost the
        # same shared response
        shared_response_partial_raw = srm.transform(X[1:5],
                                                    subjects_indexes=list(
                                                        range(1, 5)))

        shared_response_partial = apply_aggregate(shared_response_partial_raw,
                                                  aggregate, input_format)
        for j in range(n_sessions):
            assert_array_almost_equal(shared_response_partial[j],
                                      shared_response[j])

        # Check that if we perform add 2 times the same subject we
        # obtain the same decomposition
        srm.add_subjects(X[:1], shared_response_raw)
        assert_array_almost_equal(safe_load(srm.basis_list[0]),
                                  safe_load(srm.basis_list[-1]))
def test_fastsrm_class(identifiability):
    n_jobs = 1
    with tempfile.TemporaryDirectory() as datadir:
        np.random.seed(0)

        np.random.seed(0)
        paths, W, S = generate_ica_friendly_data(n_voxels, n_timeframes,
                                                 n_subjects, datadir, 0)

        atlas = np.arange(1, n_voxels + 1)
        srm = IdentifiableFastSRM(
            identifiability=identifiability,
            n_subjects_ica=n_subjects,
            atlas=atlas,
            n_components=n_components,
            n_iter=10,
            temp_dir=datadir,
            low_ram=True,
            n_jobs=n_jobs,
        )

        # Raises an error because model is not fitted yet
        with pytest.raises(NotFittedError):
            srm.transform(paths)

        srm.fit(paths)

        # An error can occur if temporary directory already exists
        with pytest.raises(
                ValueError,
                match=("Path %s already exists. When a model "
                       "is used, filesystem should be "
                       r"cleaned by using the .clean\(\) "
                       "method" % srm.temp_dir),
        ):
            # Error can occur if the filesystem is uncleaned
            create_temp_dir(srm.temp_dir)
            create_temp_dir(srm.temp_dir)

        shared_response = srm.transform(paths)

        # Raise error when wrong index
        with pytest.raises(
                ValueError,
                match=("subjects_indexes should be either "
                       "a list, an array or None but "
                       "received type <class 'int'>"),
        ):
            srm.transform(paths, subjects_indexes=1000)

        with pytest.raises(
                ValueError,
                match=("subjects_indexes should be either "
                       "a list, an array or None but "
                       "received type <class 'int'>"),
        ):
            srm.inverse_transform(shared_response, subjects_indexes=1000)

        with pytest.raises(
                ValueError,
                match=("sessions_indexes should be either "
                       "a list, an array or None but "
                       "received type <class 'int'>"),
        ):
            srm.inverse_transform(shared_response, sessions_indexes=1000)

        with pytest.raises(
                ValueError,
                match=("Input data imgs has len 5 whereas "
                       "subject_indexes has len 1. "
                       "The number of basis used to compute "
                       "the shared response should be equal to "
                       "the number of subjects in imgs"),
        ):
            srm.transform(paths, subjects_indexes=[0])

        with pytest.raises(
                ValueError,
                match=("Index 1 of subjects_indexes has value 8 "
                       "whereas value should be between 0 and 4"),
        ):
            srm.transform(paths[:2], subjects_indexes=[0, 8])

        with pytest.raises(
                ValueError,
                match=("Index 1 of sessions_indexes has value 8 "
                       "whereas value should be between 0 and 1"),
        ):
            srm.inverse_transform(shared_response, sessions_indexes=[0, 8])

        # Check behavior of .clean
        assert os.path.exists(srm.temp_dir_)
        srm.clean()
        assert not os.path.exists(srm.temp_dir_)