def test_get_data_dir(tmpdir): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/modl_data') data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'modl_data') os.environ['MODL_DATA'] = expected_base_dir data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'mdoln_shared_data') os.environ['MODL_SHARED_DATA'] = expected_base_dir data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'modl_data') os.environ.pop('MODL_DATA', None) os.environ.pop('MODL_SHARED_DATA', None) data_dir = get_data_dirs(expected_base_dir)[0] assert_equal(data_dir, expected_base_dir)
def get_hcp_data(raw=False, data_dir=None): data_dir = get_data_dirs(data_dir)[0] if not os.path.exists(join(data_dir, 'HCP_extra')): raise ValueError( 'Please download HCP_extra folder using make download-hcp_extra' ' first.') if raw: mask = join(data_dir, 'HCP_extra/mask_img.nii.gz') try: mapping = json.load( open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r')) except FileNotFoundError: raise IOError('Please unmask the data using hcp_prepare.py first.') func_filenames = sorted(list(mapping.values())) else: hcp_dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=500) mask = hcp_dataset.mask # list of 4D nifti files for each subject func_filenames = hcp_dataset.func # Flatten it func_filenames = [ record for subject in func_filenames for record in subject ] # print basic information on the dataset print('First functional nifti image (4D) is at: %s' % hcp_dataset.func[0]) # 4D data return mask, func_filenames
def prepare_hcp_raw_data(data_dir=None, dest_data_dir='HCP_unmasked', n_jobs=1, smoothing_fwhm=3, n_subject=500): data_dir = get_data_dirs(data_dir)[0] source_dir = join(data_dir, 'HCP') dest_data_dir = join(data_dir, dest_data_dir) dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=n_subject) imgs = dataset.func imgs = [img for subject_imgs in imgs for img in subject_imgs] try: os.mkdir(dest_data_dir) except OSError: raise ValueError('%s already exist,' 'please delete manually before proceeding' % dest_data_dir) mask = join(data_dir, 'HCP_extra', 'mask_img.nii.gz') masker = NiftiMasker(mask_img=mask, smoothing_fwhm=smoothing_fwhm, standardize=True).fit() Parallel(n_jobs=n_jobs)( delayed(_single_mask)(masker, img, dest_data_dir, source_dir) for img in imgs) _gather(dest_data_dir)
def load_image(source, scale=1, gray=False, memory=Memory(cachedir=None)): data_dir = get_data_dirs()[0] if source == 'face': image = face(gray=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'lisboa': image = imread(join(data_dir, 'images', 'lisboa.jpg'), as_grey=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'aviris': image = open_image( join( data_dir, 'aviris', 'f100826t01p00r05rdn_b/' 'f100826t01p00r05rdn_b_sc01_ort_img.hdr')) image = np.array(image.open_memmap(), dtype=np.float32) good_bands = list(range(image.shape[2])) good_bands.remove(110) image = image[:, :, good_bands] indices = image == -50 image[indices] = -1 image[~indices] -= np.min(image[~indices]) image[~indices] /= np.max(image[~indices]) return image else: raise ValueError('Data source is not known')
def fetch_hcp_task(data_dir=None, n_subjects=500): """Nilearn like fetcher""" data_dir = get_data_dirs(data_dir)[0] source_dir = join(data_dir, 'HCP') extra_dir = join(data_dir, 'HCP_extra') mask = join(extra_dir, 'mask_img.nii.gz') func = [] meta = [] contrasts = [] ids = [] list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results'))) for dirpath in list_dir[:n_subjects]: dirpath_split = dirpath.split(os.sep) subject_id = dirpath_split[-3] serie_id = dirpath_split[-4] subject_id = int(subject_id) ids.append(subject_id) kwargs = {'subject_id': subject_id, 'serie_id': serie_id} subject_func = [] subject_contrasts = [] for i, task_id in enumerate(task_ids): task = tasks[task_id] task_name = task[0] contrast_idx = task[1] contrast = task[2] this_func = join( dirpath, "tfMRI_%s/tfMRI_%s_hp200_s4_" "level2vol.feat/cope%i.feat/" "stats/zstat1.nii.gz" % (task_name, task_name, contrast_idx)) if os.path.exists(this_func): subject_contrasts.append(contrast) subject_func.append(this_func) meta.append(kwargs) contrasts.append(subject_contrasts) func.append(subject_func) results = { 'func': func, 'contrast': contrasts, 'meta': meta, 'mask': mask, 'description': "Human connectome project", 'contrasts_description': contrasts_description } return Bunch(**results)
def fetch_hcp_behavioral(data_dir=None, n_subjects=500): import pandas as pd data_dir = get_data_dirs(data_dir)[0] source_dir = join(data_dir, 'HCP') df = pd.read_csv(join(source_dir, 'restricted_scores.csv')) list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results'))) subjects = [] for dirpath in list_dir[:n_subjects]: dirpath_split = dirpath.split(os.sep) subject_id = int(dirpath_split[-3]) subjects.append(subject_id) indices = [subject in subjects for subject in df['Subject']] df = df.loc[indices] return df
def load_movielens(version): data_home = get_data_dirs()[0] if version == "100k": path = os.path.join(data_home, "movielens100k", "movielens100k.pkl") elif version == "1m": path = os.path.join(data_home, "movielens1m", "movielens1m.pkl") elif version == "10m": path = os.path.join(data_home, "movielens10m", "movielens10m.pkl") else: raise ValueError("Invalid version of movielens.") # FIXME: make downloader if not os.path.exists(path): raise ValueError("Dowload dataset using 'make download-movielens%s' at" " project root." % version) X = skjoblib.load(path) return X
def fetch_hcp_rest(data_dir=None, n_subjects=500): """Nilearn like fetcher""" data_dir = get_data_dirs(data_dir)[0] source_dir = join(data_dir, 'HCP') extra_dir = join(data_dir, 'HCP_extra') mask = join(extra_dir, 'mask_img.nii.gz') func = [] meta = [] ids = [] list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results'))) for dirpath in list_dir[:n_subjects]: dirpath_split = dirpath.split(os.sep) subject_id = dirpath_split[-3] serie_id = dirpath_split[-4] subject_id = int(subject_id) ids.append(subject_id) kwargs = {'subject_id': subject_id, 'serie_id': serie_id} meta.append(kwargs) subject_func = [] for filename in os.listdir(dirpath): name, ext = os.path.splitext(filename) if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR', 'rfMRI_REST2_RL', 'rfMRI_REST2_LR'): filename = join(dirpath, filename, filename + '.nii.gz') subject_func.append(filename) func.append(subject_func) results = { 'func': func, 'meta': meta, 'mask': mask, 'description': "'Human connectome project" } return Bunch(**results)
def load_rest_func(dataset='adhd', n_subjects=40, test_size=0.1, raw=False, random_state=None): data_dir = get_data_dirs()[0] if dataset == 'adhd': adhd_dataset = datasets.fetch_adhd(n_subjects=n_subjects) mask = join(data_dir, 'ADHD_mask', 'mask_img.nii.gz') data = adhd_dataset.func # list of 4D nifti files for each subject elif dataset == 'hcp': if not os.path.exists(join(data_dir, 'HCP_extra')): raise ValueError( 'Please download HCP_extra folder using make ' 'download-hcp_extra ' ' first.') if raw: mask = join(data_dir, 'HCP_extra/mask_img.nii.gz') try: mapping = json.load( open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r')) except FileNotFoundError: raise ValueError( 'Please unmask the data using hcp_prepare.py first.') data = sorted(list(mapping.values())) else: hcp_dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=n_subjects) mask = hcp_dataset.mask # list of 4D nifti files for each subject data = hcp_dataset.func # Flatten it data = [(record for record in subject) for subject in data] else: raise NotImplementedError train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_state) return train_data, test_data, mask