def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'env_data') expected_dataset_dir = os.path.join(expected_base_dir, 'test') data_dir = utils._get_dataset_dir('test', default_paths=[expected_dataset_dir], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) no_write = os.path.join(tst.tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0o400) expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', default_paths=[no_write], verbose=0) # Non writeable dir is returned because dataset may be in there. assert_equal(data_dir, no_write) assert os.path.exists(data_dir) os.chmod(no_write, 0o600) shutil.rmtree(data_dir) # Verify exception for a path which exists and is a file test_file = os.path.join(tst.tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regex(OSError, 'Nilearn tried to store the dataset ' 'in the following directories, but', utils._get_dataset_dir, 'test', test_file, verbose=0)
def load_camcan_all_without_sessions(data_dir, read=False, verbose=1): """Grab all timeseries paths of camcan data without any filtering. """ dataset_name = 'camcan' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, 'participant_data.csv') phenotypic_data = pd.read_csv(phenotypic_path) timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = os.path.join(data_dir, '*.csv') timeseries_paths = glob.glob(paths) if not read: return Bunch(timeseries_paths=timeseries_paths, phenotypic_path=phenotypic_path) timeseries_data = [] for path in timeseries_paths: data = pd.read_csv(path) data = data.drop('Unnamed: 0', axis=1) timeseries_data.append(data) return Bunch(timeseries_data=timeseries_data, phenotypic_data=pd.read_csv(phenotypic_path))
def test_get_dataset_dir(): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/nilearn_data') data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'test_nilearn_data') os.environ['NILEARN_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) expected_base_dir = os.path.join(tst.tmpdir, 'env_data') expected_dataset_dir = os.path.join(expected_base_dir, 'test') data_dir = utils._get_dataset_dir( 'test', default_paths=[expected_dataset_dir], verbose=0) assert_equal(data_dir, os.path.join(expected_base_dir, 'test')) assert os.path.exists(data_dir) shutil.rmtree(data_dir) no_write = os.path.join(tst.tmpdir, 'no_write') os.makedirs(no_write) os.chmod(no_write, 0o400) expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data') os.environ['NILEARN_SHARED_DATA'] = expected_base_dir data_dir = utils._get_dataset_dir('test', default_paths=[no_write], verbose=0) # Non writeable dir is returned because dataset may be in there. assert_equal(data_dir, no_write) assert os.path.exists(data_dir) # Set back write permissions in order to be able to remove the file os.chmod(no_write, 0o600) shutil.rmtree(data_dir) # Verify exception for a path which exists and is a file test_file = os.path.join(tst.tmpdir, 'some_file') with open(test_file, 'w') as out: out.write('abcfeg') assert_raises_regex(OSError, 'Nilearn tried to store the dataset ' 'in the following directories, but', utils._get_dataset_dir, 'test', test_file, verbose=0)
def fetch_hcp_rest(data_dir, n_subjects=40): dataset_name = 'HCP' source_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=0) extra_dir = _get_dataset_dir('HCP_extra', data_dir=data_dir, verbose=0) mask = join(extra_dir, 'mask_img.nii.gz') behavioral_df = _fetch_hcp_behavioral_data(join(extra_dir, 'behavioral')) func = [] meta = [] ids = [] list_dir = glob.glob(join(source_dir, '*/*/MNINonLinear/Results')) for dirpath in list_dir[:n_subjects]: dirpath_split = dirpath.split(os.sep) subject_id = dirpath_split[-3] serie_id = dirpath_split[-4] subject_id = int(subject_id) try: this_behavioral = behavioral_df.loc[subject_id] except KeyError: # Ignore subject without behavior data continue ids.append(subject_id) kwargs = {'subject_id': subject_id, 'serie_id': serie_id} meta.append(kwargs) subject_func = [] for filename in os.listdir(dirpath): name, ext = os.path.splitext(filename) if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR', 'rfMRI_REST2_RL', 'rfMRI_REST2_LR'): filename = join(dirpath, filename, filename + '.nii.gz') subject_func.append(filename) func.append(subject_func) results = {'func': func, 'meta': meta, 'mask': mask, 'description': "'Human connectome project", 'behavioral': behavioral_df.loc[ids]} return Bunch(**results)
def fetch_hcp_rest(data_dir, n_subjects=40): dataset_name = 'HCP' source_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=0) extra_dir = _get_dataset_dir('HCP_extra', data_dir=data_dir, verbose=0) mask = join(extra_dir, 'mask_img.nii.gz') behavioral_df = _fetch_hcp_behavioral_data(join(extra_dir, 'behavioral')) func = [] meta = [] ids = [] list_dir = glob.glob(join(source_dir, '*/*/MNINonLinear/Results')) for dirpath in list_dir[:n_subjects]: dirpath_split = dirpath.split(os.sep) subject_id = dirpath_split[-3] serie_id = dirpath_split[-4] subject_id = int(subject_id) try: this_behavioral = behavioral_df.loc[subject_id] except KeyError: # Ignore subject without behavior data continue ids.append(subject_id) kwargs = {'subject_id': subject_id, 'serie_id': serie_id} meta.append(kwargs) subject_func = [] for filename in os.listdir(dirpath): name, ext = os.path.splitext(filename) if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR', 'rfMRI_REST2_RL', 'rfMRI_REST2_LR'): filename = join(dirpath, filename, filename + '.nii.gz') subject_func.append(filename) func.append(subject_func) results = { 'func': func, 'meta': meta, 'mask': mask, 'description': "'Human connectome project", 'behavioral': behavioral_df.loc[ids] } return Bunch(**results)
def test_fetch_openneuro_dataset(request_mocker, tmp_path): dataset_version = 'ds000030_R1.0.4' data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=str(tmp_path), verbose=1) url_file = os.path.join(data_dir, 'urls.json') # Prepare url files for subject and filter tests urls = [ "https://example.com/{}/stuff.html" + data_prefix + '', "https://example.com/{}/sub-xxx.html", "https://example.com/{}/sub-yyy.html", "https://example.com/{}/sub-xxx/ses-01_task-rest.txt", "https://example.com/{}/sub-xxx/ses-01_task-other.txt", "https://example.com/{}/sub-xxx/ses-02_task-rest.txt", "https://example.com/{}/sub-xxx/ses-02_task-other.txt", "https://example.com/{}/sub-yyy/ses-01.txt", "https://example.com/{}/sub-yyy/ses-02.txt" ] urls = [url.format(data_prefix) for url in urls] json.dump(urls, open(url_file, 'w')) # Only 1 subject and not subject specific files get downloaded datadir, dl_files = func.fetch_openneuro_dataset(urls, str(tmp_path), dataset_version) assert isinstance(datadir, str) assert isinstance(dl_files, list) assert len(dl_files) == 9
def fetch_localizer_first_level(data_dir=None, verbose=1): """ Download a first-level localizer fMRI dataset Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. Returns ------- data: sklearn.datasets.base.Bunch dictionary-like object, keys are: epi_img: the input 4D image paradigm: a csv file describing the paardigm """ url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy' dataset_name = "localizer_first_level" files = dict(epi_img="s12069_swaloc1_corr.nii.gz", paradigm="localizer_paradigm.csv") # The options needed for _fetch_files options = [(filename, os.path.join(url, filename), {}) for _, filename in sorted(files.items())] data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) sub_files = _fetch_files(data_dir, options, resume=True, verbose=verbose) params = dict(zip(sorted(files.keys()), sub_files)) return Bunch(**params)
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1): """Function to fetch FSL FEEDS dataset (single-subject) Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.utils.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) def _glob_fsl_feeds_data(subject_dir): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None for file_name in FSL_FEEDS_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path) or os.path.exists( file_path.rstrip(".gz")): file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name) else: if not os.path.basename(subject_dir) == 'data': return _glob_fsl_feeds_data( os.path.join(subject_dir, 'feeds/data')) else: print("%s missing from filelist!" % file_name) return None return Bunch(data_dir=data_dir, func=os.path.join(subject_dir, "fmri.nii.gz"), anat=os.path.join(subject_dir, "structural_brain.nii.gz")) # maybe data_dir already contents the data ? data = _glob_fsl_feeds_data(data_dir) if not data is None: return data # download the data print("Data absent, downloading...") url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/" "fsl-4.1.0-feeds.tar.gz") archive_path = os.path.join(data_dir, os.path.basename(url)) for i in range(2): _fetch_files(data_dir, [("feeds", url, { "uncompress": True, "move": "fsl.tar" })]) return _glob_fsl_feeds_data(data_dir)
def fetch_openneuro_dataset_index( data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1): """Download openneuro bids dataset index Downloading the index allows to explore the dataset directories to select specific files to download. The index is a sorted list of urls. Parameters ---------- data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. dataset_version: string, optional dataset version name. Assumes it is of the form [name]_[version]. verbose: int, optional verbosity level (0 means no message). Returns ------- urls_path: string Path to downloaded dataset index urls: list of string Sorted list of dataset directories """ data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, verbose=verbose) # First we download the url list from the uncompressed dataset version urls_path = os.path.join(data_dir, 'urls.json') urls = [] if not os.path.exists(urls_path): def get_url(endpoint_url, bucket_name, file_key): return '{}/{}/{}'.format(endpoint_url, bucket_name, file_key) resource = boto3.resource('s3') resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) bucket = resource.Bucket('openneuro') for obj in bucket.objects.filter(Prefix=data_prefix): # get url of files (keys of directories end with '/') if obj.key[-1] != '/': urls.append( get_url(bucket.meta.client.meta.endpoint_url, bucket.name, obj.key)) urls = sorted(urls) with open(urls_path, 'w') as json_file: json.dump(urls, json_file) else: with open(urls_path, 'r') as json_file: urls = json.load(json_file) return urls_path, urls
def fetch_craddock_adhd_200_parcellations(data_dir=None, verbose=1): """These are the parcellations from the Athena Pipeline of the ADHD 200 preprocessing initiative. 200 and 400 ROI atlases were generated using 2-level parcellation of 650 individuals from the ADHD 200 Sample. Parameters ---------- data_dir : str Directory where the data should be downloaded. Returns ------- data : sklearn.datasets.base.Bunch dictionary-like object, keys are: parcellations_200, parcellations_400 """ url = 'http://www.nitrc.org/frs/download.php/5906/ADHD200_parcellations.tar.gz' opts = {'uncompress': True} dataset_name = 'craddock_ADHD200_parcellations' filenames = [("ADHD200_parcellate_200.nii.gz", url, opts), ("ADHD200_parcellate_400.nii.gz", url, opts)] data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) files = _fetch_files(data_dir, filenames, verbose=verbose) keys = ("parcellations_200", "parcellations_400") params = dict(list(zip(keys, files))) return Bunch(**params)
def fetch_reduced_loadings(data_dir=None, url=None, verbose=False, resume=True): if url is None: url = 'http://cogspaces.github.io/assets/data/loadings/' data_dir = get_data_dir(data_dir) dataset_name = 'loadings' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) keys = STUDY_LIST paths = ['data_%s.pt' % key for key in keys] urls = [url + path for path in paths] files = [(path, url, {}) for path, url in zip(paths, urls)] files = _fetch_files(data_dir, files, resume=resume, verbose=verbose) params = {key: file for key, file in zip(keys, files)} fdescr = ( "Z-statistic loadings over a dictionary of 453 components covering " "grey-matter `modl_atlas['components_512_gm']` " "for 35 different task fMRI studies.") params['description'] = fdescr params['data_dir'] = data_dir return params
def fetch_fiac_first_level(data_dir=None, verbose=1): """ Download a first-level fiac fMRI dataset (2 sessions) Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. """ data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir, verbose=verbose) def _glob_fiac_data(): """glob data from subject_dir.""" _subject_data = {} subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') for session in [1, 2]: # glob func data for session session + 1 session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) if not os.path.isfile(session_func): print('Missing functional scan for session %i.' % session) return None _subject_data['func%i' % session] = session_func # glob design matrix .npz file sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session) if not os.path.isfile(sess_dmtx): print('Missing session file: %s' % sess_dmtx) return None _subject_data['design_matrix%i' % session] = sess_dmtx # glob for mask data mask = os.path.join(subject_dir, 'mask.nii.gz') if not os.path.isfile(mask): print('Missing mask image.') return None _subject_data['mask'] = mask return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_fiac_data() if data is not None: return data # No. Download the data print('Data absent, downloading...') url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print('Archive corrupted, trying to download it again.') return fetch_fiac_first_level(data_dir=data_dir) return _glob_fiac_data()
def test_fetch_openneuro_dataset(): dataset_version = 'ds000030_R1.0.4' data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=tst.tmpdir, verbose=1) url_file = os.path.join(data_dir, 'urls.json') # Prepare url files for subject and filter tests urls = [ data_prefix + '/stuff.html', data_prefix + '/sub-xxx.html', data_prefix + '/sub-yyy.html', data_prefix + '/sub-xxx/ses-01_task-rest.txt', data_prefix + '/sub-xxx/ses-01_task-other.txt', data_prefix + '/sub-xxx/ses-02_task-rest.txt', data_prefix + '/sub-xxx/ses-02_task-other.txt', data_prefix + '/sub-yyy/ses-01.txt', data_prefix + '/sub-yyy/ses-02.txt' ] json.dump(urls, open(url_file, 'w')) # Only 1 subject and not subject specific files get downloaded datadir, dl_files = datasets.fetch_openneuro_dataset( urls, tst.tmpdir, dataset_version) assert_true(isinstance(datadir, _basestring)) assert_true(isinstance(dl_files, list)) assert_true(len(dl_files) == 9)
def fetch_emotion_ratings(data_dir=None, resume=True, verbose=1): '''Download and loads emotion rating dataset from neurovault Args: data_dir: (string, optional). Path of the data directory. Used to force data storage in a specified location. Default: None n_subjects: (int, optional) Number of subjects, from 1 to 6. NOTE: n_subjects is deprecated from 0.2.6 and will be removed in 0.3 Use `subjects` instead. subjects : (list or int, optional) Either a list of subjects or the number of subjects to load, from 1 to 6. By default, 2nd subject will be loaded. Empty list returns no subject data Returns: ''' collection = 1964 dataset_name = 'chang2015_emotion_ratings' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) metadata, files = download_collection(collection=collection, data_dir=data_dir, resume=resume, verbose=verbose) return Brain_Data(data=files, X=metadata)
def fetch_localizer_first_level(data_dir=None, verbose=1): """ Download a first-level localizer fMRI dataset Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. Returns ------- data: sklearn.datasets.base.Bunch dictionary-like object, with the keys: epi_img: the input 4D image events: a csv file describing the paardigm """ url = 'https://osf.io/2bqxn/download' epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz' events = 'sub-12069_task-localizer_events.tsv' opts = {'uncompress': True} options = ('epi_img', 'events') dir_ = 'localizer_first_level' filenames = [(os.path.join(dir_, name), url, opts) for name in [epi_img, events]] dataset_name = 'localizer_first_level' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) files = _fetch_files(data_dir, filenames, verbose=verbose) params = dict(list(zip(options, files))) return Bunch(**params)
def test_make_fresh_openneuro_dataset_urls_index(): dataset_version = 'ds000030_R1.0.4' data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=tst.tmpdir, verbose=1) url_file = os.path.join( data_dir, 'nistats_fetcher_openneuro_dataset_urls.json', ) # Prepare url files for subject and filter tests file_list = [ data_prefix + '/stuff.html', data_prefix + '/sub-xxx.html', data_prefix + '/sub-yyy.html', data_prefix + '/sub-xxx/ses-01_task-rest.txt', data_prefix + '/sub-xxx/ses-01_task-other.txt', data_prefix + '/sub-xxx/ses-02_task-rest.txt', data_prefix + '/sub-xxx/ses-02_task-other.txt', data_prefix + '/sub-yyy/ses-01.txt', data_prefix + '/sub-yyy/ses-02.txt' ] with open(url_file, 'w') as f: json.dump(file_list, f) # Only 1 subject and not subject specific files get downloaded datadir, dl_files = make_fresh_openneuro_dataset_urls_index( tst.tmpdir, dataset_version) assert_true(isinstance(datadir, _basestring)) assert_true(isinstance(dl_files, list)) assert_true(len(dl_files) == len(file_list))
def fetch_mist(): """Download MIST parcellation n=122 https://mniopenresearch.org/articles/1-3 Returns ------- maps : str Path to MIST parcellation labels : list of str Anatomical labels assigned to each label """ url = 'https://ndownloader.figshare.com/files/9811081' opts = {'uncompress': True} data_dir = _get_dataset_dir('mist', data_dir=None, verbose=1) files = [(join('Release', 'Parcel_Information', 'MIST_122.csv'), url, opts), (join('Release', 'Parcellations', 'MIST_122.nii.gz'), url, opts)] files = _fetch_files(data_dir, files, resume=True, verbose=1) parcel_info = pd.read_csv(files[0], sep=';') names = parcel_info['name'] df = pd.DataFrame(['Background'], columns=['name']) for i in range(names.shape[0]): df2 = pd.DataFrame([names[i]], columns=['name']) df = df.append(df2, ignore_index=True) return Bunch(maps=files[1], labels=df)
def fetch_bids_langloc_dataset(data_dir=None, verbose=1): """Download language localizer example bids dataset. Parameters ---------- data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. verbose: int, optional verbosity level (0 means no message). Returns ------- data_dir: string Path to downloaded dataset downloaded_files: list of string Absolute paths of downloaded files on disk """ url = 'https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e' dataset_name = 'bids_langloc_example' main_folder = 'bids_langloc_dataset' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) # The files_spec needed for _fetch_files files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})] if not os.path.exists(os.path.join(data_dir, main_folder)): downloaded_files = _fetch_files(data_dir, files_spec, resume=True, verbose=verbose) _uncompress_file(downloaded_files[0]) main_path = os.path.join(data_dir, main_folder) file_list = [os.path.join(path, f) for path, dirs, files in os.walk(main_path) for f in files] return os.path.join(data_dir, main_folder), sorted(file_list)
def load_hcp_confounds(data_dir, session, session_type, verbose=1): """Load confounds of HCP of "LR" Session we have 1 and 2 in integers which denotes REST1 and REST 2 session_type we have is 'LR' and 'RL' """ dataset_name = 'HCP' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) paths = os.path.join(data_dir, '*') confound_name = 'confounds' data_dir = os.path.join(data_dir, '*', confound_name) if session == 1: if session_type == 'LR': filename_session = 'rfMRI_REST1_LR_Movement_Regressors.txt' elif session_type == 'RL': filename_session = 'rfMRI_REST1_RL_Movement_Regressors.txt' if session == 2: if session_type == 'LR': filename_session = 'rfMRI_REST2_LR_Movement_Regressors.txt' elif session_type == 'RL': filename_session = 'rfMRI_REST2_RL_Movement_Regressors.txt' paths = os.path.join(data_dir, filename_session) paths = glob.glob(paths) return paths
def load_hcp(data_dir, session, session_type, atlas_name='msdl', verbose=1): """Load HCP timeseries data paths of "LR" Session we have 1 and 2 in integers which denotes REST1 and REST 2 session_type we have is 'LR' and 'RL' """ dataset_name = 'HCP' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) paths = os.path.join(data_dir, '*') data_dir = os.path.join(data_dir, '*', atlas_name) if session == 1: if session_type == 'LR': filename_session = 'rfMRI_REST1_LR_raw' elif session_type == 'RL': filename_session = 'rfMRI_REST1_RL_raw' if session == 2: if session_type == 'LR': filename_session = 'rfMRI_REST2_LR_raw' elif session_type == 'RL': filename_session = 'rfMRI_REST2_RL_raw' paths = os.path.join(data_dir, filename_session) paths = glob.glob(paths) return paths
def fetch_fiac_first_level(data_dir=None, verbose=1): """ Download a first-level fiac fMRI dataset (2 sessions) Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. """ data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir, verbose=verbose) def _glob_fiac_data(): """glob data from subject_dir.""" _subject_data = {} subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') for session in [1, 2]: # glob func data for session session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) if not os.path.isfile(session_func): print('Missing functional scan for session %i.' % session) return None _subject_data['func%i' % session] = session_func # glob design matrix .npz file sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session) if not os.path.isfile(sess_dmtx): print('Missing session file: %s' % sess_dmtx) return None _subject_data['design_matrix%i' % session] = sess_dmtx # glob for mask data mask = os.path.join(subject_dir, 'mask.nii.gz') if not os.path.isfile(mask): print('Missing mask image.') return None _subject_data['mask'] = mask return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_fiac_data() if data is not None: return data # No. Download the data print('Data absent, downloading...') url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print('Archive corrupted, trying to download it again.') return fetch_fiac_first_level(data_dir=data_dir) return _glob_fiac_data()
def test_fetch_openneuro_dataset(request_mocker, tmp_path): dataset_version = 'ds000030_R1.0.4' data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version, ) data_dir = _get_dataset_dir( data_prefix, data_dir=tmp_path, verbose=1, ) url_file = os.path.join(data_dir, 'urls.json') # Prepare url files for subject and filter tests urls = [ f'https://example.com/{data_prefix}/stuff.html', f'https://example.com/{data_prefix}/sub-xxx.html', f'https://example.com/{data_prefix}/sub-yyy.html', f'https://example.com/{data_prefix}/sub-xxx/ses-01_task-rest.txt', f'https://example.com/{data_prefix}/sub-xxx/ses-01_task-other.txt', f'https://example.com/{data_prefix}/sub-xxx/ses-02_task-rest.txt', f'https://example.com/{data_prefix}/sub-xxx/ses-02_task-other.txt', f'https://example.com/{data_prefix}/sub-yyy/ses-01.txt', f'https://example.com/{data_prefix}/sub-yyy/ses-02.txt', ] json.dump(urls, open(url_file, 'w')) # Only 1 subject and not subject specific files get downloaded datadir, dl_files = func.fetch_openneuro_dataset(urls, tmp_path, dataset_version) assert isinstance(datadir, str) assert isinstance(dl_files, list) assert len(dl_files) == 9 # URLs do not contain the data_prefix, which should raise a ValueError urls = [ 'https://example.com/stuff.html', 'https://example.com/sub-yyy/ses-01.txt', ] with pytest.raises(ValueError, match='This indicates that the URLs'): func.fetch_openneuro_dataset(urls, tmp_path, dataset_version) # Try downloading a different dataset without providing URLs # This should raise a warning and download ds000030. with pytest.warns( UserWarning, match='Downloading "ds000030_R1.0.4".', ): urls_path, urls = func.fetch_openneuro_dataset( urls=None, data_dir=tmp_path, dataset_version='ds500_v2', verbose=1, )
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1): if url is None: url = 'http://cogspaces.github.io/assets/data/hcp_mask.nii.gz' files = [('hcp_mask.nii.gz', url, {})] dataset_name = 'mask' data_dir = get_data_dir(data_dir) dataset_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose) return files[0]
def fetch_openneuro_dataset_index(data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1): """ Download a file with OpenNeuro BIDS dataset index. Downloading the index allows to explore the dataset directories to select specific files to download. The index is a sorted list of urls. Parameters ---------- data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. dataset_version: string, optional dataset version name. Assumes it is of the form [name]_[version]. verbose: int, optional verbosity level (0 means no message). Returns ------- urls_path: string Path to downloaded dataset index urls: list of string Sorted list of dataset directories """ data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version, ) data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, verbose=verbose) file_url = 'https://osf.io/86xj7/download' final_download_path = os.path.join(data_dir, 'urls.json') downloaded_file_path = _fetch_files(data_dir=data_dir, files=[(final_download_path, file_url, { 'move': final_download_path })], resume=True) urls_path = downloaded_file_path[0] with open(urls_path, 'r') as json_file: urls = json.load(json_file) return urls_path, urls
def fetch_atlas_modl(data_dir=None, url=None, resume=True, verbose=1): """Download and load a multi-scale atlas computed using MODL over HCP900. Parameters ---------- data_dir: string, optional Path of the data directory. Used to force data storage in a non- standard location. Default: None (meaning: default) url: string, optional Download URL of the dataset. Overwrite the default URL. """ if url is None: url = 'http://cogspaces.github.io/assets/data/modl/' data_dir = get_data_dir(data_dir) dataset_name = 'modl' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) keys = [ 'components_64', 'components_128', 'components_453_gm', 'loadings_128_gm' ] paths = [ 'components_64.nii.gz', 'components_128.nii.gz', 'components_453_gm.nii.gz', 'loadings_128_gm.npy', ] urls = [url + path for path in paths] files = [(path, url, {}) for path, url in zip(paths, urls)] files = _fetch_files(data_dir, files, resume=resume, verbose=verbose) params = {key: file for key, file in zip(keys, files)} fdescr = 'Components computed using the MODL package, at various scale,' \ 'from HCP900 data' params['description'] = fdescr params['data_dir'] = data_dir return Bunch(**params)
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1): if url is None: url = 'http://www.amensch.fr/data/cogspaces/mask/' files = ['hcp_mask.nii.gz', 'icbm_gm_mask.nii.gz', 'contrast_mask.nii.gz'] if isinstance(url, str): url = [url] * len(files) files = [(f, u + f, {}) for f, u in zip(files, url)] dataset_name = 'mask' data_dir = get_data_dir(data_dir) dataset_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose) return {'hcp': files[0], 'icbm_gm': files[1], 'contrast': files[2]}
def fetch_language_localizer_demo_dataset(data_dir=None, verbose=1): """Download language localizer demo dataset. Parameters ---------- data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. verbose: int, optional verbosity level (0 means no message). Returns ------- data_dir: string Path to downloaded dataset downloaded_files: list of string Absolute paths of downloaded files on disk """ url = 'https://osf.io/nh987/download' main_folder = 'fMRI-language-localizer-demo-dataset' data_dir = _get_dataset_dir(main_folder, data_dir=data_dir, verbose=verbose) # The files_spec needed for _fetch_files files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})] # Only download if directory is empty # Directory will have been created by the call to _get_dataset_dir above if not os.listdir(data_dir): downloaded_files = _fetch_files(data_dir, files_spec, resume=True, verbose=verbose) _uncompress_file(downloaded_files[0]) file_list = [ os.path.join(path, f) for path, dirs, files in os.walk(data_dir) for f in files ] return data_dir, sorted(file_list)
def fetch_emotion_ratings(data_dir=None, resume=True, verbose=1): """Download and loads emotion rating dataset from neurovault Args: data_dir: (string, optional). Path of the data directory. Used to force data storage in a specified location. Default: None Returns: out: (Brain_Data) Brain_Data object with downloaded data. X=metadata """ collection = 1964 dataset_name = "chang2015_emotion_ratings" data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) metadata, files = download_collection(collection=collection, data_dir=data_dir, resume=resume, verbose=verbose) return Brain_Data(data=files, X=metadata)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id='sub001', verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string Path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) if not os.path.exists(subject_dir): _download_spm_auditory_data(data_dir, subject_dir, subject_id) spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir) try: spm_auditory_data['events'] except KeyError: events_filepath = _make_path_events_file_spm_auditory_data( spm_auditory_data) if not os.path.isfile(events_filepath): _make_events_file_spm_auditory_data(events_filepath) spm_auditory_data['events'] = events_filepath return spm_auditory_data
def fetch_spm_multimodal_fmri(data_dir=None, data_name='spm_multimodal_fmri', subject_id='sub001', verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data(subject_dir) if data is not None: return data # No. Download the data return _download_data_spm_multimodal(data_dir, subject_dir, subject_id)
def download_collection(collection=None, data_dir=None, overwrite=False, resume=True, verbose=1): """ Download images and metadata from Neurovault collection Args: collection (int, optional): collection id. Defaults to None. data_dir (str, optional): data directory. Defaults to None. overwrite (bool, optional): overwrite data directory. Defaults to False. resume (bool, optional): resume download. Defaults to True. verbose (int, optional): print diagnostic messages. Defaults to 1. Returns: (pd.DataFrame, list): (DataFrame of image metadata, list of files from downloaded collection) """ if data_dir is None: data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, verbose=verbose) # Get collection Metadata metadata = get_collection_image_metadata(collection=collection, data_dir=data_dir) # Get images files = [] for f in metadata["file"]: files.append( _fetch_file(f, data_dir, resume=resume, verbose=verbose, overwrite=overwrite)) return (metadata, files)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string Path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) if not os.path.exists(subject_dir): _download_spm_auditory_data(data_dir, subject_dir, subject_id) spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir) try: spm_auditory_data['events'] except KeyError: events_filepath = _make_path_events_file_spm_auditory_data( spm_auditory_data) if not os.path.isfile(events_filepath): _make_events_file_spm_auditory_data(events_filepath) spm_auditory_data['events'] = events_filepath return spm_auditory_data
def fetch_atlas_gordon_2014(coordinate_system='MNI', resolution=2, data_dir=None, url=None, resume=True, verbose=1): """Download and returns Gordon et al. 2014 atlas References ---------- Gordon, E. M., Laumann, T. O., Adeyemo, B., Huckins, J. F., Kelley, W. M., & Petersen, S. E., "Generation and evaluation of a cortical area parcellation from resting-state correlations", 2014, Cerebral cortex, bhu239. See http://www.nil.wustl.edu/labs/petersen/Resources.html for more information on this parcellation. """ if url is None: url = ("https://sites.wustl.edu/petersenschlaggarlab/files/" "2018/06/Parcels-19cwpgu.zip") dataset_name = "gordon_2014" data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) valid_coordinate_systems = ['MNI', '711-2b'] if coordinate_system not in valid_coordinate_systems: raise ValueError('Unknown coordinate system {0}. ' 'Valid options are {1}'.format( coordinate_system, valid_coordinate_systems)) if resolution not in [1, 2, 3]: raise ValueError('Invalid resolution {0}. ' 'Valid options are 1, 2 or 3.'.format(resolution)) target_file = os.path.join('Parcels', 'Parcels_{0}_{1}.nii'.format( coordinate_system, str(resolution) * 3)) atlas = _fetch_files(data_dir, [(target_file, url, {"uncompress": True})], resume=resume, verbose=verbose) return atlas
def download_collection(collection=None, data_dir=None, overwrite=False, resume=True, verbose=1): ''' Download images and metadata from Neurovault collection Args: collection: (int) collection id data_dir: (str) data directory Returns: metadata: (pd.DataFrame) Dataframe with full image metadata from collection files: (list) list of files of downloaded collection ''' if data_dir is None: data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, verbose=verbose) # Get collection Metadata metadata = get_collection_image_metadata(collection=collection, data_dir=data_dir) # Get images files = [] for f in metadata['file']: files.append( _fetch_file(f, data_dir, resume=resume, verbose=verbose, overwrite=overwrite)) return (metadata, files)
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri", subject_id="sub001", verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data(subject_dir) if data is not None: return data # No. Download the data return _download_data_spm_multimodal(data_dir, subject_dir, subject_id)
def fetch_pain(data_dir=None, resume=True, verbose=1): '''Download and loads pain dataset from neurovault Args: data_dir: (string, optional) Path of the data directory. Used to force data storage in a specified location. Default: None Returns: out: (Brain_Data) Brain_Data object with downloaded data. X=metadata ''' collection = 504 dataset_name = 'chang2015_pain' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) metadata, files = download_collection(collection=collection, data_dir=data_dir, resume=resume, verbose=verbose) return Brain_Data(data=files, X=metadata)
def load_abide(data_dir, site_id='all', read=False, verbose=1): """ Load ABIDE data timeseries extracted using MSDL atlas + compcor=10 Parameters ---------- data_dir : str Path to data. Base directory where it should contain folder named with 'ABIDE'. site_id : str or list of str (case sensitive), optional='all' Site id within, 'PITT', 'OLIN', 'OHSU', 'SDSU', 'TRINITY', 'UM_1', 'UM_2', 'USM', 'YALE', 'CMU', 'LEUVEN_1', 'LEUVEN_2', 'KKI', 'NYU', 'STANFORD', 'UCLA_1', 'UCLA_2', 'MAX_MUN', 'CALTECH', 'SBL' By default, data of all sites will be returned, site_id='all'. Total sites = 20 read : bool Whether to read them or not using pandas. verbose : int Verbosity level Returns ------- data : Bunch if read == False timeseries_paths : list of str Paths to csv contains timeseries data of each site. phenotypic_path : str Path to csv contains phenotypic data if read is set as True timeseries_data : list of numpy array Load them using pandas and convert to numpy arrays to be in compatible to nilearn and ConnectivityMeasure. file_ids : list of str Its file ids dx_groups : list of int Its DX_GROUP 1 is autism, 2 is control phenotypic_data : pandas Data Loaded phenotypic data """ VALID_IDS = ['Pitt', 'Olin', 'OHSU', 'SDSU', 'Trinity', 'UM_1', 'UM_2', 'USM', 'Yale', 'CMU', 'Leuven_1', 'Leuven_2', 'KKI', 'NYU', 'Stanford', 'UCLA_1', 'UCLA_2', 'MaxMun', 'Caltech', 'SBL'] dataset_name = 'ABIDE' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, 'Phenotypic_V1_0b_preprocessed1.csv') timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = [] if site_id == 'all': site_id = VALID_IDS if not isinstance(site_id, collections.Iterable): site_id = [site_id, ] if isinstance(site_id, collections.Iterable): for i, this_id in enumerate(site_id): print(i, this_id) if not isinstance(this_id, _basestring) \ or this_id not in VALID_IDS: raise ValueError('An invalid site_id={0} is provided. ' 'Valid site names are: {1}' .format(this_id, VALID_IDS)) filepaths = glob.glob(os.path.join(data_dir, this_id + '*.csv')) paths.extend(filepaths) if read: phenotypic_data = pd.read_csv(phenotypic_path) timeseries_data = [] file_ids = [] dx_groups = [] if len(paths) != 0: for path in paths: filename = os.path.splitext(os.path.split(path)[1])[0] this_id = filename.split('_timeseries')[0] file_ids.append(this_id) data = pd.read_csv(path) data = data.drop('Unnamed: 0', axis=1) timeseries_data.append(np.asarray(data)) this_group = phenotypic_data[ phenotypic_data['FILE_ID'] == this_id]['DX_GROUP'] dx_groups.append(this_group.values[0]) return Bunch(timeseries_data=timeseries_data, file_ids=file_ids, dx_groups=dx_groups, phenotypic_data=phenotypic_data) else: return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)
def fetch_openneuro_dataset_index( data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1): """Download openneuro bids dataset index Downloading the index allows to explore the dataset directories to select specific files to download. The index is a sorted list of urls. Note: This function requires boto3 to be installed. Parameters ---------- data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. dataset_version: string, optional dataset version name. Assumes it is of the form [name]_[version]. verbose: int, optional verbosity level (0 means no message). Returns ------- urls_path: string Path to downloaded dataset index urls: list of string Sorted list of dataset directories """ from botocore.handlers import disable_signing boto3 = _check_import_boto3("boto3") data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, verbose=verbose) # First we download the url list from the uncompressed dataset version urls_path = os.path.join(data_dir, 'urls.json') urls = [] if not os.path.exists(urls_path): def get_url(endpoint_url, bucket_name, file_key): return '{}/{}/{}'.format(endpoint_url, bucket_name, file_key) resource = boto3.resource('s3') resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) bucket = resource.Bucket('openneuro') for obj in bucket.objects.filter(Prefix=data_prefix): # get url of files (keys of directories end with '/') if obj.key[-1] != '/': urls.append( get_url(bucket.meta.client.meta.endpoint_url, bucket.name, obj.key)) urls = sorted(urls) with open(urls_path, 'w') as json_file: json.dump(urls, json_file) else: with open(urls_path, 'r') as json_file: urls = json.load(json_file) return urls_path, urls
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1): """Function to fetch FSL FEEDS dataset (single-subject) Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) def _glob_fsl_feeds_data(subject_dir): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} subject_data["subject_dir"] = subject_dir for file_name in FSL_FEEDS_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path) or os.path.exists( file_path.rstrip(".gz")): file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name) subject_data[file_name] = file_path else: if not os.path.basename(subject_dir) == 'data': return _glob_fsl_feeds_data(os.path.join(subject_dir, 'feeds/data')) else: print "%s missing from filelist!" % file_name return None _subject_data = {"func": os.path.join(subject_dir, "fmri.nii.gz"), "anat": os.path.join(subject_dir, "structural_brain.nii.gz") } return Bunch(**_subject_data) # maybe data_dir already contents the data ? data = _glob_fsl_feeds_data(data_dir) if not data is None: return data # download the data print("Data absent, downloading...") url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/" "fsl-4.1.0-feeds.tar.gz") archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print "Archive corrupted, trying to download it again." os.remove(archive_path) return fetch_fsl_feeds(data_dir=data_dir, data_name="") return _glob_fsl_feeds_data(data_dir)
def fetch_kirby(subjects=range(2), sessions=[1], data_dir=None, url=None, resume=True, verbose=1): """Download and load the KIRBY multi-modal dataset. Parameters ---------- subjects : sequence of int or None, optional ids of subjects to load, default to loading 2 subjects. sessions: iterable of int, optional The sessions to load. Load only the first session by default. data_dir: string, optional Path of the data directory. Used to force data storage in a specified location. Default: None url: string, optional Override download URL. Used for test only (or if you setup a mirror of the data). Default: None Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are : - 'anat': Paths to structural MPRAGE images - 'asl': Paths to ASL images - 'm0': Paths to ASL M0 images Notes ------ This dataset is composed of 2 sessions of 21 participants (11 males) at 3T. Imaging modalities include MPRAGE, FLAIR, DTI, resting state fMRI, B0 and B1 field maps, ASL, VASO, quantitative T1 mapping, quantitative T2 mapping, and magnetization transfer imaging. For each session, we only download MPRAGE and ASL data. More details about this dataset can be found here : https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263 http://mri.kennedykrieger.org/databases.html Paper to cite ------------- `Multi-Parametric Neuroimaging Reproducibility: A 3T Resource Study <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263>`_ Bennett. A. Landman, Alan J. Huang, Aliya Gifford,Deepti S. Vikram, Issel Anne L. Lim, Jonathan A.D. Farrell, John A. Bogovic, Jun Hua, Min Chen, Samson Jarso, Seth A. Smith, Suresh Joel, Susumu Mori, James J. Pekar, Peter B. Barker, Jerry L. Prince, and Peter C.M. van Zijl. NeuroImage. (2010) NIHMS/PMC:252138 doi:10.1016/j.neuroimage.2010.11.047 Licence ------- `BIRN Data License <http://www.nbirn.net/bdr/Data_Use_Agreement_09_19_07-1.pdf>`_ """ if url is None: url = 'https://www.nitrc.org/frs/downloadlink.php/' # Preliminary checks and declarations dataset_name = 'kirby' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) subject_ids = np.array([ '849', '934', '679', '906', '913', '142', '127', '742', '422', '815', '906', '239', '916', '959', '814', '505', '959', '492', '239', '142', '815', '679', '800', '916', '849', '814', '800', '656', '742', '113', '913', '502', '113', '127', '505', '502', '934', '492', '346', '656', '346', '422']) nitrc_ids = np.arange(2201, 2243) ids = np.arange(1, 43) # Group indices by session _, indices1 = np.unique(subject_ids, return_index=True) subject_ids1 = subject_ids[sorted(indices1)] nitrc_ids1 = nitrc_ids[sorted(indices1)] ids1 = ids[sorted(indices1)] tuple_indices = [np.where(subject_ids == s)[0] for s in subject_ids1] indices2 = [idx1 if idx1 not in indices1 else idx2 for (idx1, idx2) in tuple_indices] subject_ids2 = subject_ids[indices2] nitrc_ids2 = nitrc_ids[indices2] ids2 = ids[indices2] # Check arguments max_subjects = len(subject_ids) if max(subjects) > max_subjects: warnings.warn('Warning: there are only {0} subjects'.format( max_subjects)) subjects = range(max_subjects) unique_subjects, indices = np.unique(subjects, return_index=True) if len(unique_subjects) < len(subjects): warnings.warn('Warning: Duplicate subjects, removing them.') subjects = unique_subjects[np.argsort(indices)] n_subjects = len(subjects) archives = [ [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for (nitrc_id, id) in zip(nitrc_ids1, ids1)], [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for (nitrc_id, id) in zip(nitrc_ids2, ids2)] ] anat1 = [os.path.join('session1', subject, 'KKI2009-{0:02}-MPRAGE.nii'.format(i)) for subject, i in zip(subject_ids1, ids1)] anat2 = [os.path.join('session2', subject, 'KKI2009-{0:02}-MPRAGE.nii'.format(i)) for subject, i in zip(subject_ids2, ids2)] asl1 = [os.path.join('session1', subject, 'KKI2009-{0:02}-ASL.nii'.format(i)) for subject, i in zip(subject_ids1, ids1)] asl2 = [os.path.join('session2', subject, 'KKI2009-{0:02}-ASL.nii'.format(i)) for subject, i in zip(subject_ids2, ids2)] m01 = [os.path.join('session1', subject, 'KKI2009-{0:02}-ASLM0.nii'.format(i)) for subject, i in zip(subject_ids1, ids1)] m02 = [os.path.join('session2', subject, 'KKI2009-{0:02}-ASLM0.nii'.format(i)) for subject, i in zip(subject_ids2, ids2)] target = [ [os.path.join('session1', subject, 'KKI2009-{0:02}.tar.bz2'.format(id)) for (subject, id) in zip(subject_ids1, ids1)], [os.path.join('session2', subject, 'KKI2009-{0:02}.tar.bz2'.format(id)) for (subject, id) in zip(subject_ids2, ids2)] ] anat = [anat1, anat2] asl = [asl1, asl2] m0 = [m01, m02] source_anat = [] source_asl = [] source_m0 = [] source_archives = [] session = [] target_archives = [] for i in sessions: if not (i in [1, 2]): raise ValueError('KIRBY dataset session id must be in [1, 2]') source_anat += [anat[i - 1][subject] for subject in subjects] source_asl += [asl[i - 1][subject] for subject in subjects] source_m0 += [m0[i - 1][subject] for subject in subjects] source_archives += [archives[i - 1][subject] for subject in subjects] target_archives += [target[i - 1][subject] for subject in subjects] session += [i] * n_subjects # Dataset description fdescr = _get_dataset_descr(dataset_name) # Call fetch_files once per subject. asl = [] m0 = [] anat = [] for anat_u, asl_u, m0_u, archive, target in zip(source_anat, source_asl, source_m0, source_archives, target_archives): n, a, m = _fetch_files( data_dir, [(anat_u, archive, {'uncompress': True, 'move': target}), (asl_u, archive, {'uncompress': True, 'move': target}), (m0_u, archive, {'uncompress': True, 'move': target})], verbose=verbose) anat.append(n) asl.append(a) m0.append(m) return Bunch(anat=anat, asl=asl, m0=m0, session=session, description=fdescr)
def fetch_openneuro_dataset( urls=None, data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1): """Download openneuro bids dataset. Note: This function requires boto3 to be installed. Parameters ---------- urls: list of string, optional Openneuro url list of dataset files to download. If not specified all files of the specified dataset will be downloaded. data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. dataset_version: string, optional dataset version name. Assumes it is of the form [name]_[version]. verbose: int, optional verbosity level (0 means no message). Returns ------- data_dir: string Path to downloaded dataset downloaded_files: list of string Absolute paths of downloaded files on disk """ boto3 = _check_import_boto3("boto3") data_prefix = '{}/{}/uncompressed'.format( dataset_version.split('_')[0], dataset_version) data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, verbose=verbose) # if urls are not specified we download the complete dataset index if urls is None: _, urls = fetch_openneuro_dataset_index( data_dir=data_dir, dataset_version=dataset_version, verbose=verbose) # The files_spec needed for _fetch_files files_spec = [] files_dir = [] for url in urls: url_path = url.split(data_prefix + '/')[1] file_dir = os.path.join(data_dir, url_path) files_spec.append((os.path.basename(file_dir), url, {})) files_dir.append(os.path.dirname(file_dir)) # download the files downloaded = [] for file_spec, file_dir in zip(files_spec, files_dir): # Timeout errors are common in the s3 connection so we try to avoid # failure of the dataset download for a transient instability success = False download_attempts = 4 while download_attempts > 0 and not success: try: downloaded_files = _fetch_files( file_dir, [file_spec], resume=True, verbose=verbose) downloaded += downloaded_files success = True except Exception: download_attempts -= 1 if not success: raise Exception('multiple failures downloading %s' % file_spec[1]) return data_dir, sorted(downloaded)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string Path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_auditory_data(): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} for file_name in SPM_AUDITORY_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path): subject_data[file_name] = file_path else: print("%s missing from filelist!" % file_name) return None _subject_data = {} _subject_data["func"] = sorted([ subject_data[x] for x in subject_data.keys() if re.match("^fM00223_0\d\d\.img$", os.path.basename(x)) ]) # volumes for this dataset of shape (64, 64, 64, 1); let's fix this for x in _subject_data["func"]: vol = nibabel.load(x) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, x) _subject_data["anat"] = [ subject_data[x] for x in subject_data.keys() if re.match("^sM00223_002\.img$", os.path.basename(x)) ][0] # ... same thing for anat vol = nibabel.load(_subject_data["anat"]) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, _subject_data["anat"]) return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_auditory_data() if data is not None: return data # No. Download the data print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_auditory_data()
def load_acpi(data_dir, site_id='all', read=False, verbose=1): """ Load ACPI data (timeseries) extracted using MSDL atlas + compcor=10 Parameters ---------- data_dir : str Path to data. Base directory where it should contain folder named with 'ACPI'. site_id : int or list of int, optional='all' Site id within, [3, 9, 20, 190, 1, 5] By default, data of all sites will be returned, site_id='all'. Total sites = 6 read : bool Whether to read them or not using pandas. verbose : int Verbosity level Returns ------- data : Bunch if read == False timeseries_paths : list of str Paths to csv contains timeseries data of each site. phenotypic_path : str Path to csv contains phenotypic data if read is set as True timeseries_data : list of numpy array Load them using pandas and convert to numpy arrays to be in compatible to nilearn and ConnectivityMeasure. subject_ids : list of str Its subject ids dx_groups : list of int Its DX_GROUP (1 - MJUser, 0 - No MJ) phenotypic_data : pandas Data Loaded phenotypic data """ VALID_IDS = [3, 9, 20, 190, 1, 5] dataset_name = 'ACPI' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, 'mta_1_phenotypic_data.csv') phenotypic_data = pd.read_csv(phenotypic_path) timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = [] if site_id == 'all': site_id = VALID_IDS if not isinstance(site_id, collections.Iterable): site_id = [site_id, ] if isinstance(site_id, collections.Iterable): for i, this_id in enumerate(site_id): if not isinstance(this_id, int) or this_id not in VALID_IDS: raise ValueError('An invalid site_id={0} is provided. ' 'Valid site names are: {1}' .format(this_id, VALID_IDS)) file_ids = phenotypic_data[phenotypic_data['SITE_ID'] == this_id] file_ids = file_ids['SUBID'].values for this_file_id in file_ids: filepath = glob.glob(os.path.join(data_dir, str(this_file_id) + '_timeseries.csv')) paths.extend(filepath) if read: timeseries_data = [] subject_ids = [] dx_groups = [] if len(paths) != 0: for path in paths: filename = os.path.splitext(os.path.split(path)[1])[0] this_id = int(filename.split('_timeseries')[0]) subject_ids.append(this_id) data = pd.read_csv(path) data = data.drop('Unnamed: 0', axis=1) timeseries_data.append(np.asarray(data)) this_group = phenotypic_data[ phenotypic_data['SUBID'] == this_id]['MJUser'] dx_groups.append(this_group.values[0]) return Bunch(timeseries_data=timeseries_data, subject_ids=subject_ids, dx_groups=dx_groups, phenotypic_data=phenotypic_data) else: return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_auditory_data(): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} for file_name in SPM_AUDITORY_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path): subject_data[file_name] = file_path else: print("%s missing from filelist!" % file_name) return None _subject_data = {} _subject_data["func"] = sorted([subject_data[x] for x in subject_data.keys() if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))]) # volumes for this dataset of shape (64, 64, 64, 1); let's fix this for x in _subject_data["func"]: vol = nibabel.load(x) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, x) _subject_data["anat"] = [subject_data[x] for x in subject_data.keys() if re.match("^sM00223_002\.img$", os.path.basename(x))][0] # ... same thing for anat vol = nibabel.load(_subject_data["anat"]) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, _subject_data["anat"]) return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_auditory_data() if not data is None: return data # No. Download the data print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_auditory_data()
def load_cobre(data_dir, read=False, verbose=1): """ Load COBRE data (timeseries) extracted using MSDL atlas + (compcor=10 and motion regressors). Parameters ---------- data_dir : str Path to data. Base directory where it should contain folder named with 'COBRE'. read : bool Whether to read them or not using pandas. verbose : int Verbosity level Returns ------- data : Bunch if read == False timeseries_paths : list of str Paths to csv contains timeseries data of each site. phenotypic_path : str Path to csv contains phenotypic data if read is set as True timeseries_data : list of numpy array Load them using pandas and convert to numpy arrays to be in compatible to nilearn and ConnectivityMeasure. scan_ids : list of str Its file ids dx_groups : list of int Its DX_GROUP Schizophrenia, Control, Bipolar, Schizoaffective phenotypic_data : pandas Data Loaded phenotypic data """ dataset_name = 'COBRE' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, '1139_Cobre_Neuropsych_V2_20160607.csv') phenotypic_data = pd.read_csv(phenotypic_path) timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = glob.glob(os.path.join(data_dir, '*.csv')) if read: timeseries_data = [] file_ids = [] dx_groups = [] for path in paths: filename = os.path.splitext(os.path.split(path)[1])[0] this_id = filename.split('_timeseries')[0] file_ids.append(this_id) data = pd.read_csv(path) timeseries_data.append(np.asarray(data)) this_group = phenotypic_data[ (phenotypic_data['Unnamed: 0'] == this_id)]['Unnamed: 1'] if np.any(this_group): dx_groups.append(this_group.values[0]) else: dx_groups.append('Did not match') return Bunch(timeseries_data=timeseries_data, file_ids=file_ids, dx_groups=dx_groups, phenotypic_data=phenotypic_data) else: return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)
def load_camcan(data_dir, session_id='all', read=False, verbose=1): """ Load CAMCAN data (timeseries) extracted using MSDL atlas + (compcor=10 and motion regressors) Parameters ---------- data_dir : str Path to data. Base directory where it should contain folder named with 'camcan'. session_id : int or list of int, optional='all' Session within, [1, 2, 3, 4] read : bool Whether to read them or not using pandas. verbose : int Verbosity level Returns ------- data : Bunch if read == False timeseries_paths : list of str Paths to csv contains timeseries data of each site. phenotypic_path : str Path to csv contains phenotypic data if read is set as True timeseries_data : list of numpy array Load them using pandas and convert to numpy arrays to be in compatible to nilearn and ConnectivityMeasure. subject_ids : list of str Its subject ids phenotypic_data : pandas Data Loaded phenotypic data """ VALID_IDS = [1, 2, 3, 4] dataset_name = 'camcan' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, 'participant_data.csv') phenotypic_data = pd.read_csv(phenotypic_path) timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = [] timeseries_data = [] subject_ids = [] session_names = {1: 'cbuid280_sess1', 2: 'cbuid280_sess2', 3: 'cbuid280_sess3', 4: 'cbuid280_sess4'} if session_id == 'all': session_id = VALID_IDS if not isinstance(session_id, collections.Iterable): session_id = [session_id, ] if isinstance(session_id, collections.Iterable): for i, this_id in enumerate(session_id): print(this_id) if not isinstance(this_id, int) or this_id not in VALID_IDS: raise ValueError('An invalid session_id={0} is provided. ' 'Valid session ids are: {1}' .format(this_id, VALID_IDS)) this_id_data = phenotypic_data[session_names[this_id]] this_id_data = this_id_data[this_id_data.notnull()] session_name = session_names[this_id] this_data_indices = this_id_data.index.values for index in this_data_indices: observation_id = phenotypic_data[ (phenotypic_data[session_name] == this_id_data[index])]['Observations'] filepath = glob.glob(os.path.join(data_dir, 'sub-' + observation_id[index] + '_timeseries.csv')) print(filepath) if len(filepath) != 0: if read: subject_ids.append(observation_id[index]) this_index_data = pd.read_csv(filepath[0]) timeseries_data.append(this_index_data) else: paths.extend(filepath) if read: return Bunch(timeseries_data=timeseries_data, subject_ids=subject_ids, phenotypic_data=pd.read_csv(phenotypic_path)) else: return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)
def load_adnidod(data_dir, read=False, verbose=1): """ Load ADNIDOD data (timeseries) extracted using MSDL atlas + (compcor=10 and motion regressors). Parameters ---------- data_dir : str Path to data. Base directory where it should contain folder named with 'ADNIDOD'. read : bool Whether to read them or not using pandas. verbose : int Verbosity level Returns ------- data : Bunch if read == False timeseries_paths : list of str Paths to csv contains timeseries data of each site. phenotypic_path : str Path to csv contains phenotypic data if read is set as True timeseries_data : list of numpy array Load them using pandas and convert to numpy arrays to be in compatible to nilearn and ConnectivityMeasure. scan_ids : list of str Its file ids dx_groups : list of int Its DX_GROUP (1 - PTSD, 0 - Control) phenotypic_data : pandas Data Loaded phenotypic data """ dataset_name = 'ADNIDOD' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) phenotypic_path = os.path.join(data_dir, 'adnidod_demographic.csv') phenotypic_data = pd.read_csv(phenotypic_path) timeseries_name = 'timeseries' data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir, verbose=verbose) paths = glob.glob(os.path.join(data_dir, '*.csv')) if read: timeseries_data = [] scan_ids = [] dx_groups = [] for path in paths: filename = os.path.splitext(os.path.split(path)[1])[0] this_id = filename.split('_timeseries')[0] scan_ids.append(this_id) data = pd.read_csv(path) data = data.drop('Unnamed: 0', axis=1) timeseries_data.append(np.asarray(data)) this_group = phenotypic_data[ phenotypic_data['ID_scan'] == this_id]['diagnosis'] dx_groups.append(this_group.values[0]) return Bunch(timeseries_data=timeseries_data, scan_ids=scan_ids, dx_groups=dx_groups, phenotypic_data=phenotypic_data) else: return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri", subject_id="sub001", verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_multimodal_fmri_data(): """glob data from subject_dir.""" _subject_data = {'slice_order': 'descending'} for s in range(2): # glob func data for session s + 1 session_func = sorted(glob.glob( os.path.join( subject_dir, ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % ( s + 1, s + 5))))) if len(session_func) < 390: print "Missing %i functional scans for session %i." % ( 390 - len(session_func), s) return None else: _subject_data['func%i' % (s + 1)] = session_func # glob trials .mat file sess_trials = os.path.join( subject_dir, "fMRI/trials_ses%i.mat" % (s + 1)) if not os.path.isfile(sess_trials): print "Missing session file: %s" % sess_trials return None else: _subject_data['trials_ses%i' % (s + 1)] = sess_trials # glob for anat data anat = os.path.join(subject_dir, "sMRI/smri.img") if not os.path.isfile(anat): print "Missing structural image." return None else: _subject_data["anat"] = anat return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data() if not data is None: return data # No. Download the data print("Data absent, downloading...") urls = [ # fmri ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_fmri.zip"), # structural ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_smri.zip") ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_multimodal_fmri_data(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_multimodal_fmri_data()