def test_addsubs_wo_fit(tempdir, identifiability): with tempfile.TemporaryDirectory() as datadir: X, W, S = generate_data( n_voxels, [24, 25], n_subjects, n_components, datadir, 0, "list_of_list", ) if tempdir: temp_dir = datadir else: temp_dir = None srm = IdentifiableFastSRM( identifiability=identifiability, n_subjects_ica=n_subjects, n_components=n_components, n_iter=10, temp_dir=temp_dir, ) srm.add_subjects(X, S) for i in range(len(W)): assert_array_almost_equal(safe_load(srm.basis_list[i]), W[i])
def srm_reduce_data(X, n_components): """ Reduce the number of features in X via FastSRM Parameters ---------- X : np array of shape (n_groups, n_features, n_samples) Training vector, where n_groups is the number of groups, n_samples is the number of samples and n_components is the number of components. n_components : int, optional Number of components to extract. If None, no dimension reduction is performed Returns ------- projection: np array of shape (n_groups, n_components, n_features) The projection matrix that projects data in reduced space reduced: np array of shape (n_groups, n_components, n_samples) Reduced data """ srm = IdentifiableFastSRM( n_components=n_components, tol=1e-10, aggregate=None, identifiability="decorr", ) S = np.array(srm.fit_transform([x for x in X])) W = np.array(srm.basis_list) return W, S
def test_recover_ica_basis(): X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects, None, 0, "list_of_list") srm = IdentifiableFastSRM(n_components=3, identifiability="ica") srm.fit(X) srm.basis_list = align_basis(srm.basis_list, W) for i in range(len(srm.basis_list)): assert_array_almost_equal(srm.basis_list[i], W[i], 2)
def test_overwriting_memory(): n_voxels = 500 n_timeframes = [100, 101] n_subjects = 4 with tempfile.TemporaryDirectory() as datadir: X_train = [np.random.rand(100, 10) for _ in range(3)] srm = IdentifiableFastSRM( identifiability=None, n_components=5, temp_dir=datadir, memory=datadir + "/memory", ) srm.fit(X_train) W = [np.load(w) for w in srm.basis_list] srm2 = IdentifiableFastSRM( identifiability="decorr", n_components=5, temp_dir=datadir, memory=datadir + "/memory", ) srm2.fit(X_train) W_ = [np.load(w) for w in srm.basis_list] np.testing.assert_array_almost_equal(W, W_)
def test_atlas(): n_voxels = 500 n_timeframes = 100 n_subjects = 4 atlas = np.repeat(np.arange(10), 50) X_train = [ np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects) ] srm = IdentifiableFastSRM(identifiability=None, atlas=atlas, n_components=5, n_iter=1) S = srm.fit_transform(X_train) X_pred = srm.inverse_transform(S)
def test_recover_decorr_basis(): n_voxels = 50 n_timeframes = [100, 101] n_subjects = 4 X, W, S = generate_decorr_friendly_data(n_voxels, n_timeframes, n_subjects, None, 0, "list_of_list") srm = IdentifiableFastSRM(n_components=3, identifiability="decorr", n_iter=10, aggregate="mean") S_pred = srm.fit_transform(X) S_pred_full = np.concatenate(S_pred, axis=1) S_full = np.concatenate(S, axis=1) srm.basis_list = align_basis(srm.basis_list, W) for i in range(len(srm.basis_list)): assert_array_almost_equal(srm.basis_list[i], W[i])
def test_temp_files(): n_voxels = 500 n_timeframes = 10 n_subjects = 4 X_train = [ np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects) ] X_train2 = [ np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects) ] srm = IdentifiableFastSRM( identifiability=None, n_components=5, n_iter=1, n_iter_reduced=3, random_state=None, temp_dir="./temp", ) srm.fit(X_train) srm2 = clone(srm) srm2.fit(X_train2) assert srm2.basis_list[0] != srm.basis_list[0] assert srm2.temp_dir_ != srm.temp_dir_ srm2.clean() srm.clean() assert len(glob("./temp/*")) == 0
def test_ncomponents(): X_train = [np.random.rand(100, 20) for _ in range(3)] X_test = [np.random.rand(100, 5) for _ in range(3)] srm = IdentifiableFastSRM(n_components=10, verbose=False) srm.fit(X_train) srm.transform(X_test)
def test_convergence(): n_voxels = 500 n_timeframes = [100, 101] n_subjects = 4 X = [[np.random.rand(n_voxels, n_t) for n_t in n_timeframes] for _ in range(n_subjects)] srm = IdentifiableFastSRM( atlas=np.arange(n_voxels), n_components=3, tol=1e-9, n_iter=1000, n_iter_reduced=1000, ) srm.fit(X) assert srm.grads[0][-1] < 1e-5 assert srm.grads[1][-1] < 1e-5 tot_loss = np.concatenate([srm.losses[0], srm.losses[1]]) diff_tot_loss = tot_loss[:-1] - tot_loss[1:] assert np.prod(diff_tot_loss > 0) == 1
def test_random_state(): n_voxels = 500 n_timeframes = 10 n_subjects = 4 X_train = [ np.random.rand(n_voxels, n_timeframes) for _ in range(n_subjects) ] srm = IdentifiableFastSRM( identifiability=None, n_components=5, n_iter=1, n_iter_reduced=3, random_state=None, ) srm2 = IdentifiableFastSRM( identifiability=None, n_components=5, n_iter=1, n_iter_reduced=3, random_state=0, ) srm3 = IdentifiableFastSRM( identifiability=None, n_components=5, n_iter=1, n_iter_reduced=3, random_state=0, ) S1 = srm.fit_transform(X_train) S2 = srm2.fit_transform(X_train) S3 = srm3.fit_transform(X_train) print(S1) print(S2) print(S3) np.testing.assert_allclose(S2, S3) for S in [S2, S3]: with pytest.raises(AssertionError, ): np.testing.assert_allclose(S, S1)
def test_use_pca(): for i in tqdm(range(20)): X_train = [np.random.rand(100, 10) for _ in range(3)] srm = IdentifiableFastSRM( n_components=5, use_pca=False, tol=1e-18, identifiability="decorr", n_iter=10000, ) A = srm.fit_transform(X_train) srm2 = IdentifiableFastSRM( n_components=5, use_pca=True, n_iter=1, identifiability="decorr", tol=1e-18, n_iter_reduced=10000, ) B = srm2.fit_transform(X_train) np.testing.assert_array_almost_equal(A, B, 4)
def test_memory(): n_voxels = 500 n_timeframes = [100, 101] n_subjects = 4 with tempfile.TemporaryDirectory() as datadir: X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects, datadir, 0) dts = [] for (low_ram, tempdir, n_jobs, aggregate, identifiability) in [ (True, True, 1, "mean", "decorr"), (False, False, 2, None, "ica"), (True, True, 1, "mean", None), (False, False, 1, None, None), ]: if tempdir: temp_dir = datadir else: temp_dir = None srm = IdentifiableFastSRM( identifiability=identifiability, n_subjects_ica=n_subjects, atlas=np.arange(n_voxels), n_components=n_components, n_iter=100, n_iter_reduced=100, temp_dir=temp_dir, low_ram=low_ram, tol=0, n_jobs=n_jobs, aggregate=aggregate, memory=datadir + "/memory", ) t0 = time() srm.fit(X) t1 = time() dts.append(t1 - t0) shared_response_raw = srm.transform(X) # Check inverse transform reconstructed_data = srm.inverse_transform( shared_response_raw, sessions_indexes=[1], subjects_indexes=[0, 2], ) for i, ii in enumerate([0, 2]): for j, jj in enumerate([1]): assert_array_almost_equal(reconstructed_data[i][j], safe_load(X[ii][jj])) reconstructed_data = srm.inverse_transform( shared_response_raw, subjects_indexes=None, sessions_indexes=None, ) for i in range(len(X)): for j in range(len(X[i])): assert_array_almost_equal(reconstructed_data[i][j], safe_load(X[i][j])) srm.clean() for i in range(len(dts) - 1): assert dts[0] > dts[i + 1]
def test_class_srm_inverse_transform( input_format, low_ram, tempdir, atlas, n_jobs, n_timeframes, aggregate, identifiability, ): with tempfile.TemporaryDirectory() as datadir: X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects, datadir, 0, input_format) if tempdir: temp_dir = datadir else: temp_dir = None srm = IdentifiableFastSRM( identifiability=identifiability, n_subjects_ica=n_subjects, atlas=atlas, n_components=n_components, n_iter=10, temp_dir=temp_dir, low_ram=low_ram, n_jobs=n_jobs, aggregate=aggregate, ) srm.fit(X) shared_response_raw = srm.transform(X) # Check inverse transform if input_format == "list_of_array": reconstructed_data = srm.inverse_transform(shared_response_raw, subjects_indexes=[0, 2]) for i, ii in enumerate([0, 2]): assert_array_almost_equal(reconstructed_data[i], X[ii]) reconstructed_data = srm.inverse_transform(shared_response_raw, subjects_indexes=None) for i in range(len(X)): assert_array_almost_equal(reconstructed_data[i], X[i]) else: reconstructed_data = srm.inverse_transform( shared_response_raw, sessions_indexes=[1], subjects_indexes=[0, 2], ) for i, ii in enumerate([0, 2]): for j, jj in enumerate([1]): assert_array_almost_equal(reconstructed_data[i][j], safe_load(X[ii][jj])) reconstructed_data = srm.inverse_transform( shared_response_raw, subjects_indexes=None, sessions_indexes=None, ) for i in range(len(X)): for j in range(len(X[i])): assert_array_almost_equal(reconstructed_data[i][j], safe_load(X[i][j]))
def test_fastsrm_class_correctness( input_format, low_ram, tempdir, atlas, n_jobs, n_timeframes, aggregate, identifiability, ): with tempfile.TemporaryDirectory() as datadir: np.random.seed(0) X, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects, datadir, 0, input_format) XX, n_sessions = apply_input_format(X, input_format) if tempdir: temp_dir = datadir else: temp_dir = None srm = IdentifiableFastSRM( identifiability=identifiability, n_subjects_ica=n_subjects, atlas=atlas, n_components=n_components, n_iter=1000, tol=1e-7, temp_dir=temp_dir, low_ram=low_ram, n_jobs=n_jobs, aggregate=aggregate, ) # Check that there is no difference between fit_transform # and fit then transform shared_response_fittransform = apply_aggregate(srm.fit_transform(X), aggregate, input_format) prev_basis = srm.basis_list # we need to align both basis though... srm.basis_list = align_basis(srm.basis_list, prev_basis) basis = [safe_load(b) for b in srm.basis_list] shared_response_raw = srm.transform(X) shared_response = apply_aggregate(shared_response_raw, aggregate, input_format) for j in range(n_sessions): assert_array_almost_equal(shared_response_fittransform[j], shared_response[j], 1) # Check that the decomposition works for i in range(n_subjects): for j in range(n_sessions): assert_array_almost_equal(shared_response[j].T.dot(basis[i]), XX[i][j].T) # Check that if we use all subjects but one if gives almost the # same shared response shared_response_partial_raw = srm.transform(X[1:5], subjects_indexes=list( range(1, 5))) shared_response_partial = apply_aggregate(shared_response_partial_raw, aggregate, input_format) for j in range(n_sessions): assert_array_almost_equal(shared_response_partial[j], shared_response[j]) # Check that if we perform add 2 times the same subject we # obtain the same decomposition srm.add_subjects(X[:1], shared_response_raw) assert_array_almost_equal(safe_load(srm.basis_list[0]), safe_load(srm.basis_list[-1]))
def test_fastsrm_class(identifiability): n_jobs = 1 with tempfile.TemporaryDirectory() as datadir: np.random.seed(0) np.random.seed(0) paths, W, S = generate_ica_friendly_data(n_voxels, n_timeframes, n_subjects, datadir, 0) atlas = np.arange(1, n_voxels + 1) srm = IdentifiableFastSRM( identifiability=identifiability, n_subjects_ica=n_subjects, atlas=atlas, n_components=n_components, n_iter=10, temp_dir=datadir, low_ram=True, n_jobs=n_jobs, ) # Raises an error because model is not fitted yet with pytest.raises(NotFittedError): srm.transform(paths) srm.fit(paths) # An error can occur if temporary directory already exists with pytest.raises( ValueError, match=("Path %s already exists. When a model " "is used, filesystem should be " r"cleaned by using the .clean\(\) " "method" % srm.temp_dir), ): # Error can occur if the filesystem is uncleaned create_temp_dir(srm.temp_dir) create_temp_dir(srm.temp_dir) shared_response = srm.transform(paths) # Raise error when wrong index with pytest.raises( ValueError, match=("subjects_indexes should be either " "a list, an array or None but " "received type <class 'int'>"), ): srm.transform(paths, subjects_indexes=1000) with pytest.raises( ValueError, match=("subjects_indexes should be either " "a list, an array or None but " "received type <class 'int'>"), ): srm.inverse_transform(shared_response, subjects_indexes=1000) with pytest.raises( ValueError, match=("sessions_indexes should be either " "a list, an array or None but " "received type <class 'int'>"), ): srm.inverse_transform(shared_response, sessions_indexes=1000) with pytest.raises( ValueError, match=("Input data imgs has len 5 whereas " "subject_indexes has len 1. " "The number of basis used to compute " "the shared response should be equal to " "the number of subjects in imgs"), ): srm.transform(paths, subjects_indexes=[0]) with pytest.raises( ValueError, match=("Index 1 of subjects_indexes has value 8 " "whereas value should be between 0 and 4"), ): srm.transform(paths[:2], subjects_indexes=[0, 8]) with pytest.raises( ValueError, match=("Index 1 of sessions_indexes has value 8 " "whereas value should be between 0 and 1"), ): srm.inverse_transform(shared_response, sessions_indexes=[0, 8]) # Check behavior of .clean assert os.path.exists(srm.temp_dir_) srm.clean() assert not os.path.exists(srm.temp_dir_)