def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'env_data') os.environ['MY_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', env_vars=['MY_DATA'], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) no_write = os.path.join(tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0o400) # Verify that default is used if non writeable dir os.environ['MY_DATA'] = no_write expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', env_vars=['MY_DATA'], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) # Verify exception is raised on read-only directories assert_raises_regex(OSError, 'Permission denied', datasets._get_dataset_dir, 'test', no_write, verbose=0) # Verify exception for a path which exists and is a file test_file = os.path.join(tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regex(OSError, 'Not a directory', datasets._get_dataset_dir, 'test', test_file, verbose=0)
def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'env_data') expected_dataset_dir = os.path.join(expected_base_dir, 'test') data_dir = datasets._get_dataset_dir('test', default_paths=[expected_dataset_dir], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) no_write = os.path.join(tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0o400) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', default_paths=[no_write], verbose=0) # Non writeable dir is returned because dataset may be in there. assert_equal(data_dir, no_write) assert os.path.exists(data_dir) shutil.rmtree(data_dir) # Verify exception for a path which exists and is a file test_file = os.path.join(tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regex(OSError, 'Not a directory', datasets._get_dataset_dir, 'test', test_file, verbose=0)
def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'env_data') expected_dataset_dir = os.path.join(expected_base_dir, 'test') data_dir = datasets._get_dataset_dir( 'test', default_paths=[expected_dataset_dir], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) no_write = os.path.join(tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0o400) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', default_paths=[no_write], verbose=0) # Non writeable dir is returned because dataset may be in there. assert_equal(data_dir, no_write) assert os.path.exists(data_dir) shutil.rmtree(data_dir) # Verify exception for a path which exists and is a file test_file = os.path.join(tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regex(OSError, 'Not a directory', datasets._get_dataset_dir, 'test', test_file, verbose=0)
def fetch_localizer_first_level(data_dir=None, verbose=1): """ Download a first-level localizer fMRI dataset Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. Returns ------- data: sklearn.datasets.base.Bunch dictionary-like object, keys are: epi_img: the input 4D image paradigm: a csv file describing the paardigm """ url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy' dataset_name = "localizer_first_level" files = dict(epi_img="s12069_swaloc1_corr.nii.gz", paradigm="localizer_paradigm.csv") # The options needed for _fetch_files options = [(filename, os.path.join(url, filename), {}) for _, filename in sorted(files.items())] data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) sub_files = _fetch_files(data_dir, options, resume=True, verbose=verbose) params = dict(zip(sorted(files.keys()), sub_files)) return Bunch(**params)
def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = datasets._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) # Verify exception is raised on read-only directories no_write = os.path.join(tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0400) assert_raises_regexp(OSError, 'Permission denied', datasets._get_dataset_dir, 'test', no_write, verbose=0) # Verify exception for a path which exists and is a file test_file = os.path.join(tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regexp(OSError, 'Not a directory', datasets._get_dataset_dir, 'test', test_file, verbose=0)
def fetch_fiac_first_level(data_dir=None, verbose=1): """ Download a first-level fiac fMRI dataset (2 sessions) Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. """ data_dir = _get_dataset_dir('', data_dir=data_dir, verbose=verbose) def _glob_fiac_data(): """glob data from subject_dir.""" _subject_data = {} subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') for session in [1, 2]: # glob func data for session session + 1 session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) if not os.path.isfile(session_func): print('Missing functional scan for session %i.' % session) return None _subject_data['func%i' % session] = session_func # glob design matrix .npz file sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session) if not os.path.isfile(sess_dmtx): print('Missing session file: %s' % sess_dmtx) return None _subject_data['design_matrix%i' % session] = sess_dmtx # glob for mask data mask = os.path.join(subject_dir, 'mask.nii.gz') if not os.path.isfile(mask): print('Missing mask image.') return None _subject_data['mask'] = mask return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_fiac_data() if data is not None: return data # No. Download the data print('Data absent, downloading...') url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print('Archive corrupted, trying to download it again.') return fetch_fiac_first_level(data_dir=data_dir) return _glob_fiac_data()
def fetch_abide_movements(data_dir=None, n_subjects=None, sort=True, verbose=0, **kwargs): """ Load ABIDE dataset The ABIDE dataset must be installed in the data_dir (or NILEARN_DATA env) into an 'ABIDE' folder. The Phenotypic information file should be in this folder too. Parameters ---------- SUB_ID: list of integers in [50001, 50607], optional Ids of the subjects to be loaded. DX_GROUP: integer in {1, 2}, optional 1 is autism, 2 is control DSM_IV_TR: integer in [0, 4], optional O is control, 1 is autism, 2 is Asperger, 3 is PPD-NOS, 4 is Asperger or PPD-NOS AGE_AT_SCAN: float in [6.47, 64], optional Age of the subject SEX: integer in {1, 2}, optional 1 is male, 2 is female HANDEDNESS_CATEGORY: string in {'R', 'L', 'Mixed', 'Ambi'}, optional R = Right, L = Left, Ambi = Ambidextrous HANDEDNESS_SCORE: integer in [-100, 100], optional Positive = Right, Negative = Left, 0 = Ambidextrous """ name_csv = 'Phenotypic_V1_0b.csv' dataset_dir = _get_dataset_dir('abide_movements', data_dir=data_dir) #path_csv = _fetch_files('abide_movements', [(name_csv, # 'file:' + os.path.join('dataset', name_csv), {})], # data_dir=data_dir)[0] path_csv = _fetch_files('abide_movements', [(name_csv, 'file:' + os.path.join('dataset', name_csv), {})])[0] # The situation is a bit complicated here as we will load movements # depending on whether they are provided or not. We load a file just to # download the movements files. sort_csv = _fetch_files('abide_movements', [('sort.csv', 'file:' + os.path.join('dataset', 'abide_movements.tgz'), {'uncompress': True})])[0] sort_csv = np.genfromtxt(sort_csv, delimiter=',', dtype=None) pheno = np.genfromtxt(path_csv, names=True, delimiter=',', dtype=None) if sort: pheno = pheno[_filter_columns(pheno, { 'SUB_ID': sort_csv[sort_csv['f2'] == 1]['f1']})] filter = _filter_columns(pheno, kwargs) pheno = pheno[filter] site_id_to_path = { 'CALTECH': 'Caltech', 'CMU': 'CMU', 'KKI': 'KKI', 'LEUVEN_1': 'Leuven', 'LEUVEN_2': 'Leuven', 'MAX_MUN': 'MaxMun', 'NYU': 'NYU', 'OHSU': 'OHSU', 'OLIN': 'Olin', 'PITT': 'Pitt', 'SBL': 'SBL', 'SDSU': 'SDSU', 'STANFORD': 'Stanford', 'TRINITY': 'Trinity', 'UCLA_1': 'UCLA', 'UCLA_2': 'UCLA', 'UM_1': 'UM', 'UM_2': 'UM', 'USM': 'USM', 'YALE': 'Yale' } # Get the files for all remaining subjects movement = [] filter = np.zeros(pheno.shape, dtype=np.bool) for i, (site, id) in enumerate(pheno[['SITE_ID', 'SUB_ID']]): folder = site_id_to_path[site] + '_' + str(id) base = os.path.join(dataset_dir, folder) mov = os.path.join(base, 'rp_deleteorient_rest.txt') if os.path.exists(mov): movement.append(np.loadtxt(mov)) filter[i] = True else: filter[i] = False pheno = pheno[filter] # Crop subjects if needed if n_subjects is not None: pheno = pheno[:n_subjects] movement = movement[:n_subjects] return Bunch(pheno=pheno, movement=movement)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string Path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_auditory_data(): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} for file_name in SPM_AUDITORY_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path): subject_data[file_name] = file_path else: print("%s missing from filelist!" % file_name) return None _subject_data = {} _subject_data["func"] = sorted( [subject_data[x] for x in subject_data.keys() if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))]) # volumes for this dataset of shape (64, 64, 64, 1); let's fix this for x in _subject_data["func"]: vol = nibabel.load(x) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, x) _subject_data["anat"] = [subject_data[x] for x in subject_data.keys() if re.match("^sM00223_002\.img$", os.path.basename(x))][0] # ... same thing for anat vol = nibabel.load(_subject_data["anat"]) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, _subject_data["anat"]) return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_auditory_data() if data is not None: return data # No. Download the data print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_auditory_data()
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri", subject_id="sub001", verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_multimodal_fmri_data(): """glob data from subject_dir.""" _subject_data = {'slice_order': 'descending'} for session in range(2): # glob func data for session s + 1 session_func = sorted(glob.glob( os.path.join( subject_dir, ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % ( session + 1, session + 5))))) if len(session_func) < 390: print("Missing %i functional scans for session %i." % ( 390 - len(session_func), session)) return None _subject_data['func%i' % (session + 1)] = session_func # glob trials .mat file sess_trials = os.path.join( subject_dir, "fMRI/trials_ses%i.mat" % (session + 1)) if not os.path.isfile(sess_trials): print("Missing session file: %s" % sess_trials) return None _subject_data['trials_ses%i' % (session + 1)] = sess_trials # glob for anat data anat = os.path.join(subject_dir, "sMRI/smri.img") if not os.path.isfile(anat): print("Missing structural image.") return None _subject_data["anat"] = anat return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data() if data is not None: return data # No. Download the data print("Data absent, downloading...") urls = [ # fmri ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_fmri.zip"), # structural ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_smri.zip") ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_multimodal_fmri_data()