Ejemplo n.º 1
0
def fetch_openfmri(data_dir, dataset_id, force_download=False, verbose=1):
    files = {
        'ds001': ['ds001_raw'],
        'ds002': ['ds002_raw'],
        'ds003': ['ds003_raw'],
        'ds005': ['ds005_raw'],
        'ds006A': ['ds006A_raw'],
        'ds007': ['ds007_raw'],
        'ds008': ['ds008_raw'],
        'ds011': ['ds011_raw'],
        'ds017A': ['ds017A_raw'],
        'ds017B': ['ds017B_raw'],
        'ds051': ['ds051_raw'],
        'ds052': ['ds052_raw'],
        'ds101': ['ds101_raw'],
        'ds102': ['ds102_raw'],
        'ds105': ['ds105_raw'],
        'ds107': ['ds107_raw'],
        'ds108': ['ds108_raw_part1', 'ds108_raw_part2', 'ds108_raw_part3'],
        'ds109': ['ds109_raw'],
        'ds110': ['ds110_raw_part1', 'ds110_raw_part2', 'ds110_raw_part3',
                  'ds110_raw_part4', 'ds110_raw_part5', 'ds110_raw_part6']
        }

    if dataset_id not in files:
        raise Exception('Unknown dataset %s' % dataset_id)

    base_url = 'http://openfmri.s3.amazonaws.com/tarballs/%s.tgz'
    urls = [(dataset_id, base_url % f, {'uncompress':True}) for f in files[dataset_id]]
    temp_dir = os.path.join(data_dir, '_%s' % dataset_id, dataset_id)
    output_dir = os.path.join(data_dir, dataset_id)
    if not os.path.exists(output_dir) and not force_download:
        _fetch_files(data_dir, urls, verbose=verbose)
    return output_dir
Ejemplo n.º 2
0
def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1):
    """
    Fetches HCP standard mesh atlases for converting between FreeSurfer and HCP

    Parameters
    ----------
    data_dir : str, optional
        Path to use as data directory. If not specified, will check for
        environmental variable 'NNT_DATA'; if that is not set, will use
        `~/nnt-data` instead. Default: None
    url : str, optional
        URL from which to download data. Default: None
    resume : bool, optional
        Whether to attempt to resume partial download, if possible. Default:
        True
    verbose : int, optional
        Modifies verbosity of download, where higher numbers mean more updates.
        Default: 1

    Returns
    -------
    standards : str
        Filepath to standard_mesh_atlases directory
    """
    if url is None:
        url = 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip'
    dataset_name = 'standard_mesh_atlases'
    data_dir = _get_data_dir(data_dir=data_dir)
    opts = {'uncompress': True, 'move': '{}.zip'.format(dataset_name)}
    filenames = ['L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii']
    files = [(op.join(dataset_name, f), url, opts) for f in filenames]
    _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)

    return op.join(data_dir, dataset_name)
Ejemplo n.º 3
0
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1):
    """Function to fetch FSL FEEDS dataset (single-subject)

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.utils.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)

    def _glob_fsl_feeds_data(subject_dir):
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        for file_name in FSL_FEEDS_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path) or os.path.exists(
                    file_path.rstrip(".gz")):
                file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name)
            else:
                if not os.path.basename(subject_dir) == 'data':
                    return _glob_fsl_feeds_data(
                        os.path.join(subject_dir, 'feeds/data'))
                else:
                    print("%s missing from filelist!" % file_name)
                    return None
        return Bunch(data_dir=data_dir,
                     func=os.path.join(subject_dir, "fmri.nii.gz"),
                     anat=os.path.join(subject_dir, "structural_brain.nii.gz"))

    # maybe data_dir already contents the data ?
    data = _glob_fsl_feeds_data(data_dir)
    if not data is None:
        return data

    # download the data
    print("Data absent, downloading...")
    url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/"
           "fsl-4.1.0-feeds.tar.gz")
    archive_path = os.path.join(data_dir, os.path.basename(url))
    for i in range(2):
        _fetch_files(data_dir, [("feeds", url, {
            "uncompress": True,
            "move": "fsl.tar"
        })])
    return _glob_fsl_feeds_data(data_dir)
Ejemplo n.º 4
0
def fetch_openfmri(data_dir, dataset_id, force_download=False, verbose=1):
    files = {
        'ds001': ['ds001_raw'],
        'ds002': ['ds002_raw'],
        'ds003': ['ds003_raw'],
        'ds005': ['ds005_raw'],
        'ds006A': ['ds006A_raw'],
        'ds007': ['ds007_raw'],
        'ds008': ['ds008_raw'],
        'ds011': ['ds011_raw'],
        'ds017A': ['ds017A_raw'],
        'ds017B': ['ds017B_raw'],
        'ds051': ['ds051_raw'],
        'ds052': ['ds052_raw'],
        'ds101': ['ds101_raw'],
        'ds102': ['ds102_raw'],
        'ds105': ['ds105_raw'],
        'ds107': ['ds107_raw'],
        'ds108': ['ds108_raw_part1', 'ds108_raw_part2', 'ds108_raw_part3'],
        'ds109': ['ds109_raw'],
        'ds110': ['ds110_raw_part1', 'ds110_raw_part2', 'ds110_raw_part3',
                  'ds110_raw_part4', 'ds110_raw_part5', 'ds110_raw_part6']
        }

    if dataset_id not in files:
        raise Exception('Unknown dataset %s' % dataset_id)

    base_url = 'http://openfmri.s3.amazonaws.com/tarballs/%s.tgz'
    urls = [(dataset_id, base_url % f, {'uncompress':True}) for f in files[dataset_id]]
    temp_dir = os.path.join(data_dir, '_%s' % dataset_id, dataset_id)
    output_dir = os.path.join(data_dir, dataset_id)
    if not os.path.exists(output_dir) and not force_download:
        _fetch_files(data_dir, urls, verbose=verbose)
    return output_dir
Ejemplo n.º 5
0
def fetch_rat_waxholm(template_dir="~/.samri_files/templates/rat/waxholm/",
                      verbose=1):
    """Download and load waxholm atlas for Sprague Dawley rat

	Returns
	-------
	dict
		Dictionary containing template, atlas, labels
		template - mri anatomy file; nifti file
		atlas - pixelvalues of regions are grouped together, with corresponding labels in labels.ccv; nifti file
		labels - labels annotating brain regions for pixelgroups in atlas; csv file


	References
	----------
	.. [1] 'Papp, Eszter A., et al. "Waxholm Space atlas of the Sprague Dawley rat brain." NeuroImage 97 (2014): 374-386.'
	.. [2] https://www.nitrc.org/projects/whs-sd-atlas

	"""
    from nilearn.datasets.utils import _fetch_files
    # Get template
    url_template = 'https://www.nitrc.org/frs/download.php/9423/WHS_SD_rat_T2star_v1.01.nii.gz'
    template = _fetch_files(
        path.abspath(path.expanduser(template_dir)),
        [('WHS_SD_rat_T2star_v1.01.nii.gz', url_template, {})],
        verbose=verbose)[0]

    # Get atlas
    url_atlas = 'https://www.nitrc.org/frs/download.php/9438/WHS_SD_rat_atlas_v2.nii.gz'
    atlas = _fetch_files(path.abspath(path.expanduser(template_dir)),
                         [('WHS_SD_rat_atlas_v2.nii.gz', url_atlas, {})],
                         verbose=verbose)[0]

    # Get labels
    url_labels = 'https://www.nitrc.org/frs/download.php/9439/WHS_SD_rat_atlas_v2.label'
    labels = _fetch_files(path.abspath(path.expanduser(template_dir)),
                          [('WHS_SD_rat_atlas_v2.label', url_labels, {})],
                          verbose=verbose)[0]

    # resample template
    commands = [
        "ResampleImage 3 WHS_SD_rat_T2star_v1.01.nii.gz _200micron_WHS_SD_rat_T2star_v1.01.nii.gz 0.2x0.2x0.2 size=1 spacing=0 4",
        "SmoothImage 3 _200micron_WHS_SD_rat_T2star_v1.01.nii.gz 0.4 200micron_WHS_SD_rat_T2star_v1.01.nii.gz",
        "rm _200micron_WHS_SD_rat_T2star_v1.01.nii.gz",
        "ResampleImage 3 WHS_SD_rat_atlas_v2.nii.gz _200micron_WHS_SD_rat_atlas_v2.nii.gz 0.2x0.2x0.2 size=1 spacing=0 4",
        "SmoothImage 3 _200micron_WHS_SD_rat_atlas_v2.nii.gz 0.4 200micron_WHS_SD_rat_atlas_v2.nii.gz",
        "rm _200micron_WHS_SD_rat_atlas_v2.nii.gz",
    ]

    for command in commands:
        p = subprocess.Popen(command.split(),
                             cwd=path.abspath(path.expanduser(template_dir)),
                             stdout=subprocess.PIPE)
        p.wait()

    return dict([("template", path.abspath(path.expanduser(template_dir)) +
                  "200micron_WHS_SD_rat_T2star_v1.01.nii.gz"),
                 ("atlas", path.abspath(path.expanduser(template_dir)) +
                  "200micron_WHS_SD_rat_atlas_v2.nii.gz"), ("labels", labels)])
Ejemplo n.º 6
0
def fetch_rat_waxholm(template_dir="~/.samri_files/templates/rat/waxholm/", verbose=1):
	"""
	Download and load waxholm atlas for Sprague Dawley rat

	Returns
	-------
	dict
		Dictionary containing template, atlas, labels
		template - mri anatomy file; nifti file
		atlas - pixelvalues of regions are grouped together, with corresponding labels in labels.ccv; nifti file
		labels - labels annotating brain regions for pixelgroups in atlas; csv file


	Notes
	-----
	Please deprecate this function.
	We should distribute dependencies such as atlases in the form of unambiguously managed packages, and not data fetched from the web at runtime.

	References
	----------
	.. [1] 'Papp, Eszter A., et al. "Waxholm Space atlas of the Sprague Dawley rat brain." NeuroImage 97 (2014): 374-386.'
	.. [2] https://www.nitrc.org/projects/whs-sd-atlas

	"""
	from nilearn.datasets.utils import _fetch_files
	# Get template
	url_template = 'https://www.nitrc.org/frs/download.php/9423/WHS_SD_rat_T2star_v1.01.nii.gz'
	template = _fetch_files(path.abspath(path.expanduser(template_dir)), [('WHS_SD_rat_T2star_v1.01.nii.gz', url_template, {})],
			verbose=verbose)[0]

	# Get atlas
	url_atlas = 'https://www.nitrc.org/frs/download.php/9438/WHS_SD_rat_atlas_v2.nii.gz'
	atlas = _fetch_files(path.abspath(path.expanduser(template_dir)), [('WHS_SD_rat_atlas_v2.nii.gz', url_atlas, {})],
			verbose=verbose)[0]

	# Get labels
	url_labels = 'https://www.nitrc.org/frs/download.php/9439/WHS_SD_rat_atlas_v2.label'
	labels = _fetch_files(path.abspath(path.expanduser(template_dir)), [('WHS_SD_rat_atlas_v2.label', url_labels, {})],
			verbose=verbose)[0]

	# resample template
	commands = ["ResampleImage 3 WHS_SD_rat_T2star_v1.01.nii.gz _200micron_WHS_SD_rat_T2star_v1.01.nii.gz 0.2x0.2x0.2 size=1 spacing=0 4",
		"SmoothImage 3 _200micron_WHS_SD_rat_T2star_v1.01.nii.gz 0.4 200micron_WHS_SD_rat_T2star_v1.01.nii.gz",
		"rm _200micron_WHS_SD_rat_T2star_v1.01.nii.gz",
		"ResampleImage 3 WHS_SD_rat_atlas_v2.nii.gz _200micron_WHS_SD_rat_atlas_v2.nii.gz 0.2x0.2x0.2 size=1 spacing=0 4",
		"SmoothImage 3 _200micron_WHS_SD_rat_atlas_v2.nii.gz 0.4 200micron_WHS_SD_rat_atlas_v2.nii.gz",
		"rm _200micron_WHS_SD_rat_atlas_v2.nii.gz",]

	for command in commands:
		p = subprocess.Popen(command.split(), cwd=path.abspath(path.expanduser(template_dir)), stdout=subprocess.PIPE)
		p.wait()

	return dict([
			("template", path.abspath(path.expanduser(template_dir)) + "200micron_WHS_SD_rat_T2star_v1.01.nii.gz"),
			("atlas", path.abspath(path.expanduser(template_dir)) + "200micron_WHS_SD_rat_atlas_v2.nii.gz"),
			("labels", labels)])
Ejemplo n.º 7
0
def _fetch_hbnssi_participants(data_dir, url, verbose):
    """
    Helper function to fetch_hbnssi.
    This function helps in downloading and loading participants data from .tsv
    uploaded on Open Science Framework(OSF).

    Parameters
    ----------
    data_dir: str
        Path of the data directory. Used to force data storage in a specified
        location. If None is given, data are stored in home directory.
    url: str, optional
        Override download URL. Used for test only(or if you setup a mirror of
        the data). Default: None
    verbose: int
        Defines the level of verbosity of the output.

    Returns
    -------
    participants: numpy.ndarray
        Contains data of each subject age, gender, handedness.
    """
    if url is None:
        url = 'https://osf.io/wtvh3/download'

    files = [('participants.csv', url, {'move': 'participants.csv'})]
    path_to_participants = _fetch_files(data_dir, files, verbose=verbose)[0]

    # Load path to participants
    dtype = [('sid', 'U12'), ('age', '<f8'), ('Gender', 'U4'),
             ('Handedness', 'U4')]
    names = ['sid', 'age', 'gender', 'handedness']
    participants = csv_to_array(path_to_participants, skip_header=True,
                                dtype=dtype, names=names)
    return participants
Ejemplo n.º 8
0
def _fetch_hbnssi_brain_mask(data_dir, url, verbose):
    """
    Helper function to fetch_hbnssi.
    This function helps in downloading and loading the brain mask
    from Open Science Framework(OSF).

    Parameters
    ----------
    data_dir: str
        Path of the data directory. Used to force data storage in a specified
        location. If None is given, data are stored in home directory.
    url: str, optional
        Override download URL. Used for test only(or if you setup a mirror of
        the data). Default: None
    verbose: int
        Defines the level of verbosity of the output.

    Returns
    -------
    path_to_mask: str
        File path for the appropriate brain mask
    """
    if url is None:
        url = 'https://osf.io/kp6m9/download'

    target_fname = 'tpl-MNI152NLin2009cAsym_res-3mm_label-GM_desc-thr02_probseg.nii.gz'
    files = [(target_fname,
             url,
             {'move': target_fname})]
    path_to_mask = _fetch_files(data_dir, files, verbose=verbose)[0]

    return path_to_mask
def _fetch_ibc_surf_masks(data_dir, url, resume, verbose):
    """Helper function to fetch_ibc.

    This function helps in downloading brain and ROI masks for use with IBC
    functional alignment and inter-subject decoding.

    The files are downloaded from Open Science Framework (OSF).

    Parameters
    ----------
    data_dir: str
        Path of the data directory. Used to force data storage in a specified
        location. If None is given, data are stored in home directory.

    url: str, optional
        Override download URL. Used for test only (or if you setup a mirror of
        the data). Default: None

    resume: bool, optional (default True)
        Whether to resume download of a partly-downloaded file.

    verbose: int
        Defines the level of verbosity of the output.

    Returns
    -------
    derivatives_dir: str
        Path on disk to the IBC masks data directory.
    """
    if url is None:
        # Download from the relevant OSF project, using hashes generated
        # from the OSF API. Note the trailing slash. For more info, see:
        # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74
        url = 'https://osf.io/download/{}/'

    # The gzip contains unique download keys per Nifti file and CSV
    # pre-extracted from OSF. Required for downloading files.
    package_directory = os.path.dirname(os.path.abspath(__file__))
    dtype = [('filename', 'U52'), ('uid', 'U24')]
    names = ['filename', 'uid']
    # csv file contains download information
    osf_data = csv_to_array(os.path.join(package_directory,
                                         "ibc_surf_masks.csv"),
                            skip_header=True,
                            dtype=dtype,
                            names=names)

    derivatives_dir = Path(data_dir, 'masks')
    masks = []

    for this_osf_id in osf_data:

        # Download mask
        mask_url = url.format(this_osf_id['uid'])
        mask_target = Path(derivatives_dir, this_osf_id['filename'])
        mask_file = [(mask_target, mask_url, {'move': mask_target})]
        path_to_mask = _fetch_files(data_dir, mask_file, verbose=verbose)[0]
        masks.append(path_to_mask)

    return derivatives_dir
Ejemplo n.º 10
0
def fetch_bids_langloc_dataset(data_dir=None, verbose=1):
    """Download language localizer example bids dataset.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    url = 'https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e'
    dataset_name = 'bids_langloc_example'
    main_folder = 'bids_langloc_dataset'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    # The files_spec needed for _fetch_files
    files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})]
    if not os.path.exists(os.path.join(data_dir, main_folder)):
        downloaded_files = _fetch_files(data_dir, files_spec, resume=True,
                                        verbose=verbose)
        _uncompress_file(downloaded_files[0])
    main_path = os.path.join(data_dir, main_folder)
    file_list = [os.path.join(path, f) for
                 path, dirs, files in os.walk(main_path) for f in files]
    return os.path.join(data_dir, main_folder), sorted(file_list)
Ejemplo n.º 11
0
def fetch_craddock_adhd_200_parcellations(data_dir=None, verbose=1):
    """These are the parcellations from the Athena Pipeline of the ADHD
    200 preprocessing initiative. 200 and 400 ROI atlases were generated
    using 2-level parcellation of 650 individuals from the ADHD 200 Sample.

    Parameters
    ----------
    data_dir : str
        Directory where the data should be downloaded.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        parcellations_200, parcellations_400
    """
    url = 'http://www.nitrc.org/frs/download.php/5906/ADHD200_parcellations.tar.gz'
    opts = {'uncompress': True}

    dataset_name = 'craddock_ADHD200_parcellations'
    filenames = [("ADHD200_parcellate_200.nii.gz", url, opts),
                 ("ADHD200_parcellate_400.nii.gz", url, opts)]

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose)
    files = _fetch_files(data_dir, filenames, verbose=verbose)
    keys = ("parcellations_200", "parcellations_400")
    params = dict(list(zip(keys, files)))
    return Bunch(**params)
Ejemplo n.º 12
0
def fetch_mist():
    """Download MIST parcellation n=122
    https://mniopenresearch.org/articles/1-3

    Returns
    -------
    maps : str
        Path to MIST parcellation

    labels : list of str
        Anatomical labels assigned to each label
    """
    url = 'https://ndownloader.figshare.com/files/9811081'
    opts = {'uncompress': True}
    data_dir = _get_dataset_dir('mist', data_dir=None, verbose=1)
    files = [(join('Release', 'Parcel_Information',
                   'MIST_122.csv'), url, opts),
             (join('Release', 'Parcellations', 'MIST_122.nii.gz'), url, opts)]
    files = _fetch_files(data_dir, files, resume=True, verbose=1)
    parcel_info = pd.read_csv(files[0], sep=';')
    names = parcel_info['name']
    df = pd.DataFrame(['Background'], columns=['name'])
    for i in range(names.shape[0]):
        df2 = pd.DataFrame([names[i]], columns=['name'])
        df = df.append(df2, ignore_index=True)
    return Bunch(maps=files[1], labels=df)
Ejemplo n.º 13
0
def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        epi_img: the input 4D image
        paradigm: a csv file describing the paardigm
    """
    url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy'

    dataset_name = "localizer_first_level"
    files = dict(epi_img="s12069_swaloc1_corr.nii.gz",
                 paradigm="localizer_paradigm.csv")
    # The options needed for _fetch_files
    options = [(filename, os.path.join(url, filename), {})
               for _, filename in sorted(files.items())]

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    sub_files = _fetch_files(data_dir, options, resume=True,
                             verbose=verbose)

    params = dict(zip(sorted(files.keys()), sub_files))

    return Bunch(**params)
Ejemplo n.º 14
0
def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, with the keys:
        epi_img: the input 4D image
        events: a csv file describing the paardigm
    """
    url = 'https://osf.io/2bqxn/download'
    epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz'
    events = 'sub-12069_task-localizer_events.tsv'
    opts = {'uncompress': True}
    options = ('epi_img', 'events')
    dir_ = 'localizer_first_level'
    filenames = [(os.path.join(dir_, name), url, opts)
                 for name in [epi_img, events]]

    dataset_name = 'localizer_first_level'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    files = _fetch_files(data_dir, filenames, verbose=verbose)

    params = dict(list(zip(options, files)))
    return Bunch(**params)
Ejemplo n.º 15
0
def fetch_reduced_loadings(data_dir=None,
                           url=None,
                           verbose=False,
                           resume=True):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/loadings/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'loadings'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = STUDY_LIST

    paths = ['data_%s.pt' % key for key in keys]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = (
        "Z-statistic loadings over a dictionary of 453 components covering "
        "grey-matter `modl_atlas['components_512_gm']` "
        "for 35 different task fMRI studies.")

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return params
Ejemplo n.º 16
0
def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        epi_img: the input 4D image
        paradigm: a csv file describing the paardigm
    """
    url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy'

    dataset_name = "localizer_first_level"
    files = dict(epi_img="s12069_swaloc1_corr.nii.gz",
                 paradigm="localizer_paradigm.csv")
    # The options needed for _fetch_files
    options = [(filename, os.path.join(url, filename), {})
               for _, filename in sorted(files.items())]

    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    sub_files = _fetch_files(data_dir, options, resume=True, verbose=verbose)

    params = dict(zip(sorted(files.keys()), sub_files))

    return Bunch(**params)
Ejemplo n.º 17
0
def fetch_craddock_parcellation(data_dir=None,
                                url=None,
                                resume=True,
                                verbose=1):
    """Download and load the craddock parcellation.

    Parameters
    ----------
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a non-
        standard location. Default: None (meaning: default)
    mirror: string, optional
        By default, the dataset is downloaded from the original website of the
        atlas. Specifying "nitrc" will force download from a mirror, with
        potentially higher bandwith.
    url: string, optional
        Download URL of the dataset. Overwrite the default URL.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, contains:

        - 200-components parcelellation  (parcellate200)
        - 400-components parcelellation (parcellate400)


    References
    ----------

    ?
    """
    if url is None:
        url = 'http://www.amensch.fr/data/craddock_parcellation/'

    files = [
        'ADHD200_parcellate_200.nii.gz',
        'ADHD200_parcellate_400.nii.gz',
    ]

    if isinstance(url, str):
        url = [url] * len(files)

    files = [(f, u + f, {}) for f, u in zip(files, url)]

    dataset_name = 'craddock_parcellation'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    files_ = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    fdescr = 'Components from Craddock clustering atlas'

    keys = ['parcellate200', 'parcellate400']
    params = dict(zip(keys, files_))
    params['description'] = fdescr

    return Bunch(**params)
Ejemplo n.º 18
0
def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1):
    """
    Fetches von-Economo Koskinas probabilistic FreeSurfer atlas

    Parameters
    ----------
    data_dir : str, optional
        Path to use as data directory. If not specified, will check for
        environmental variable 'NNT_DATA'; if that is not set, will use
        `~/nnt-data` instead. Default: None
    url : str, optional
        URL from which to download data. Default: None
    resume : bool, optional
        Whether to attempt to resume partial download, if possible. Default:
        True
    verbose : int, optional
        Modifies verbosity of download, where higher numbers mean more updates.
        Default: 1

    Returns
    -------
    filenames : :class:`sklearn.utils.Bunch`
        Dictionary-like object with keys of format '{}Parcels{}Networks' where
        corresponding values are the left/right hemisphere annotation files

    References
    ----------
    Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den
    Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170,
    249-256.

    Notes
    -----
    License: CC-BY-NC-SA 4.0
    """

    dataset_name = 'atl-voneconomo_koskinas'
    keys = ['gcs', 'ctab', 'info']

    data_dir = _get_data_dir(data_dir=data_dir)
    info = _get_dataset_info(dataset_name)
    if url is None:
        url = info['url']
    opts = {
        'uncompress': True,
        'md5sum': info['md5'],
        'move': '{}.tar.gz'.format(dataset_name)
    }
    filenames = [
        'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff)
        for hemi in ['L', 'R'] for suff in ['gcs', 'ctab']
    ] + ['atl-vonEconomoKoskinas_info.csv']
    files = [(op.join(dataset_name, f), url, opts) for f in filenames]
    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
    data = [ANNOT(*data[:-1:2])] + [ANNOT(*data[1:-1:2])] + [data[-1]]

    return Bunch(**dict(zip(keys, data)))
Ejemplo n.º 19
0
def fetch_fsaverage(data_dir=None, url=None, resume=True, verbose=1):
    """
    Downloads files for fsaverage FreeSurfer template

    Parameters
    ----------
    data_dir : str, optional
        Path to use as data directory. If not specified, will check for
        environmental variable 'NNT_DATA'; if that is not set, will use
        `~/nnt-data` instead. Default: None
    url : str, optional
        URL from which to download data. Default: None
    resume : bool, optional
        Whether to attempt to resume partial download, if possible. Default:
        True
    verbose : int, optional
        Modifies verbosity of download, where higher numbers mean more updates.
        Default: 1

    Returns
    -------
    filenames : :class:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['surf'] where corresponding values
        are length-2 lists downloaded template files (each list composed of
        files for the left and right hemisphere).

    References
    ----------

    """

    dataset_name = 'tpl-fsaverage'
    keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']

    data_dir = _get_data_dir(data_dir=data_dir)
    info = _get_dataset_info(dataset_name)
    if url is None:
        url = info['url']

    opts = {
        'uncompress': True,
        'md5sum': info['md5'],
        'move': '{}.tar.gz'.format(dataset_name)
    }

    filenames = [
        'fsaverage/surf/{}.{}'.format(hemi, surf) for surf in keys
        for hemi in ['lh', 'rh']
    ]

    data = _fetch_files(data_dir,
                        files=[(f, url, opts) for f in filenames],
                        resume=resume,
                        verbose=verbose)
    data = [data[i:i + 2] for i in range(0, len(keys) * 2, 2)]

    return Bunch(**dict(zip(keys, data)))
Ejemplo n.º 20
0
def fetch_vazquez_rodriguez2019(data_dir=None,
                                url=None,
                                resume=True,
                                verbose=1):
    """
    Downloads files from Vazquez-Rodriguez et al., 2019, PNAS

    Parameters
    ----------
    data_dir : str, optional
        Path to use as data directory. If not specified, will check for
        environmental variable 'NNT_DATA'; if that is not set, will use
        `~/nnt-data` instead. Default: None
    url : str, optional
        URL from which to download data. Default: None
    resume : bool, optional
        Whether to attempt to resume partial download, if possible. Default:
        True
    verbose : int, optional
        Modifies verbosity of download, where higher numbers mean more updates.
        Default: 1

    Returns
    -------
    data : :class:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['rsquared', 'gradient'] containing
        1000 values from

    References
    ----------
    See `ref` key of returned dictionary object for relevant dataset reference
    """

    dataset_name = 'ds-vazquez_rodriguez2019'

    data_dir = _get_data_dir(data_dir=data_dir)
    info = _get_dataset_info(dataset_name)
    if url is None:
        url = info['url']
    opts = {
        'uncompress': True,
        'md5sum': info['md5'],
        'move': '{}.tar.gz'.format(dataset_name)
    }

    filenames = [op.join(dataset_name, 'rsquared_gradient.csv')]
    data = _fetch_files(data_dir,
                        files=[(f, url, opts) for f in filenames],
                        resume=resume,
                        verbose=verbose)

    # load data
    rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T

    return Bunch(rsquared=rsq, gradient=grad)
Ejemplo n.º 21
0
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/hcp_mask.nii.gz'
    files = [('hcp_mask.nii.gz', url, {})]

    dataset_name = 'mask'
    data_dir = get_data_dir(data_dir)
    dataset_dir = _get_dataset_dir(dataset_name,
                                   data_dir=data_dir,
                                   verbose=verbose)
    files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose)
    return files[0]
Ejemplo n.º 22
0
def fetch_rat_waxholm(template_dir="~/.samri_files/templates/rat/waxholm/",
                      verbose=1):
    """Download and load waxholm atlas for Sprague Dawley rat

	Returns
	-------
	dict
		Dictionary containing template, atlas, labels	
		template - mri anatomy file; nifti file
		atlas - pixelvalues of regions are grouped together, with corresponding labels in labels.ccv; nifti file
		labels - labels annotating brain regions for pixelgroups in atlas; csv file 


	References
	----------
	.. [1] 'Papp, Eszter A., et al. "Waxholm Space atlas of the Sprague Dawley rat brain." NeuroImage 97 (2014): 374-386.'
	.. [2] https://www.nitrc.org/projects/whs-sd-atlas

	"""
    # Get template
    url_template = 'https://www.nitrc.org/frs/download.php/9423/WHS_SD_rat_T2star_v1.01.nii.gz'
    template = _fetch_files(
        path.abspath(path.expanduser(template_dir)),
        [('WHS_SD_rat_T2star_v1.01.nii.gz', url_template, {})],
        verbose=verbose)[0]

    # Get atlas
    url_atlas = 'https://www.nitrc.org/frs/download.php/9438/WHS_SD_rat_atlas_v2.nii.gz'
    atlas = _fetch_files(path.abspath(path.expanduser(template_dir)),
                         [('WHS_SD_rat_atlas_v2.nii.gz', url_atlas, {})],
                         verbose=verbose)[0]

    # Get labels
    url_labels = 'https://www.nitrc.org/frs/download.php/9439/WHS_SD_rat_atlas_v2.label'
    labels = _fetch_files(path.abspath(path.expanduser(template_dir)),
                          [('WHS_SD_rat_atlas_v2.label', url_labels, {})],
                          verbose=verbose)[0]

    return dict([("template", template), ("atlas", atlas), ("labels", labels)])
Ejemplo n.º 23
0
def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1):
    """
    Downloads files for Pauli et al., 2018 subcortical parcellation

    Parameters
    ----------
    data_dir : str, optional
        Path to use as data directory. If not specified, will check for
        environmental variable 'NNT_DATA'; if that is not set, will use
        `~/nnt-data` instead. Default: None
    url : str, optional
        URL from which to download data. Default: None
    resume : bool, optional
        Whether to attempt to resume partial download, if possible. Default:
        True
    verbose : int, optional
        Modifies verbosity of download, where higher numbers mean more updates.
        Default: 1

    Returns
    -------
    filenames : :class:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['probabilistic', 'deterministic'],
        where corresponding values are filepaths to downloaded atlas files.

    References
    ----------
    Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution
    probabilistic in vivo atlas of human subcortical brain nuclei. Scientific
    Data, 5, 180063.

    Notes
    -----
    License: CC-BY Attribution 4.0 International
    """

    dataset_name = 'atl-pauli2018'
    keys = ['probabilistic', 'deterministic', 'info']

    data_dir = _get_data_dir(data_dir=data_dir)
    info = _get_dataset_info(dataset_name)

    # format the query how _fetch_files() wants things and then download data
    files = [
        (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name']))
        for i in info
    ]

    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)

    return Bunch(**dict(zip(keys, data)))
Ejemplo n.º 24
0
def fetch_openfmri(data_dir, dataset_id, force_download=False, verbose=1):
    files = {
        'ds001': ['ds001_raw_6'],
        'ds002': ['ds002_raw'],
        'ds003': ['ds003_raw_1'],
        'ds005': ['ds005_raw_0'],
        'ds006A': ['ds006A_raw'],
        'ds007': ['ds007_raw'],
        'ds008': ['ds008_raw_4'],
        'ds011': ['ds011_raw_0'],
        'ds017A': ['ds017A_raw_0'],
        'ds017B': ['ds017B_raw_0'],
        'ds051': ['ds051_raw_0'],
        'ds052': ['ds052_raw_0'],
        'ds101': ['ds101_raw_0'],
        'ds102': ['ds102_raw_0'],
        'ds105': ['ds105_raw_6'],
        'ds107': ['ds107_raw_0'],
        'ds108': ['ds108_raw_part1', 'ds108_raw_part2', 'ds108_raw_part3'],
        'ds109': ['ds109_raw_4'],
        'ds110': ['ds110_raw_part1', 'ds110_raw_part2', 'ds110_raw_part3',
                  'ds110_raw_part4', 'ds110_raw_part5', 'ds110_raw_part6'],
        }

    if dataset_id not in files:
        raise Exception('Unknown dataset %s' % dataset_id)

    base_url = 'https://openfmri.org/system/files/%s.tgz'
    urls = [base_url % f for f in files[dataset_id]]
    temp_dir = os.path.join(data_dir, '_%s' % dataset_id, dataset_id)
    output_dir = os.path.join(data_dir, dataset_id)

    if not os.path.exists(output_dir) and not force_download:
        _fetch_files('_%s' % dataset_id, urls, data_dir, verbose=verbose)
        shutil.move(temp_dir, output_dir)
        shutil.rmtree(os.path.split(temp_dir)[0])
    return output_dir
Ejemplo n.º 25
0
def fetch_openneuro_dataset_index(data_dir=None,
                                  dataset_version='ds000030_R1.0.4',
                                  verbose=1):
    """ Download a file with OpenNeuro BIDS dataset index.

    Downloading the index allows to explore the dataset directories
    to select specific files to download. The index is a sorted list of urls.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    urls_path: string
        Path to downloaded dataset index

    urls: list of string
        Sorted list of dataset directories
    """
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0],
        dataset_version,
    )
    data_dir = _get_dataset_dir(data_prefix,
                                data_dir=data_dir,
                                verbose=verbose)

    file_url = 'https://osf.io/86xj7/download'
    final_download_path = os.path.join(data_dir, 'urls.json')
    downloaded_file_path = _fetch_files(data_dir=data_dir,
                                        files=[(final_download_path, file_url,
                                                {
                                                    'move': final_download_path
                                                })],
                                        resume=True)
    urls_path = downloaded_file_path[0]
    with open(urls_path, 'r') as json_file:
        urls = json.load(json_file)
    return urls_path, urls
Ejemplo n.º 26
0
def fetch_atlas_modl(data_dir=None, url=None, resume=True, verbose=1):
    """Download and load a multi-scale atlas computed using MODL over HCP900.

    Parameters
    ----------
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a non-
        standard location. Default: None (meaning: default)
    url: string, optional
        Download URL of the dataset. Overwrite the default URL.
    """

    if url is None:
        url = 'http://cogspaces.github.io/assets/data/modl/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'modl'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = [
        'components_64', 'components_128', 'components_453_gm',
        'loadings_128_gm'
    ]

    paths = [
        'components_64.nii.gz',
        'components_128.nii.gz',
        'components_453_gm.nii.gz',
        'loadings_128_gm.npy',
    ]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = 'Components computed using the MODL package, at various scale,' \
             'from HCP900 data'

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return Bunch(**params)
Ejemplo n.º 27
0
def fetch_atlas_modl(data_dir=None, url=None, resume=True, verbose=1):
    """Download and load a multi-scale atlas computed using MODL over HCP900.

    Parameters
    ----------
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a non-
        standard location. Default: None (meaning: default)
    url: string, optional
        Download URL of the dataset. Overwrite the default URL.
    """
    if url is None:
        url = 'http://www.amensch.fr/data/cogspaces/modl/'

    files = [
        'components_16.nii.gz',
        'components_64.nii.gz',
        'components_128.nii.gz',
        'components_512.nii.gz',
    ]

    if isinstance(url, str):
        url = [url] * len(files)

    files = [(f, u + f, {}) for f, u in zip(files, url)]

    dataset_name = 'modl'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    files_ = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    fdescr = 'Components computed using the MODL package, at various scale,' \
             'from HCP900 data'

    keys = [
        'components16',
        'components64',
        'components128',
        'components512',
    ]

    params = dict(zip(keys, files_))
    params['description'] = fdescr

    return Bunch(**params)
Ejemplo n.º 28
0
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://www.amensch.fr/data/mask/'

    files = ['mask_img.nii.gz']

    if isinstance(url, str):
        url = [url] * len(files)

    files = [(f, u + f, {}) for f, u in zip(files, url)]

    dataset_name = 'mask'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    files_ = _fetch_files(data_dir, files, resume=resume,
                          verbose=verbose)
    return files_[0]
Ejemplo n.º 29
0
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://www.amensch.fr/data/cogspaces/mask/'

    files = ['hcp_mask.nii.gz', 'icbm_gm_mask.nii.gz', 'contrast_mask.nii.gz']

    if isinstance(url, str):
        url = [url] * len(files)

    files = [(f, u + f, {}) for f, u in zip(files, url)]

    dataset_name = 'mask'
    data_dir = get_data_dir(data_dir)
    dataset_dir = _get_dataset_dir(dataset_name,
                                   data_dir=data_dir,
                                   verbose=verbose)
    files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose)
    return {'hcp': files[0], 'icbm_gm': files[1], 'contrast': files[2]}
Ejemplo n.º 30
0
def fetch_language_localizer_demo_dataset(data_dir=None, verbose=1):
    """Download language localizer demo dataset.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    url = 'https://osf.io/nh987/download'
    main_folder = 'fMRI-language-localizer-demo-dataset'

    data_dir = _get_dataset_dir(main_folder,
                                data_dir=data_dir,
                                verbose=verbose)
    # The files_spec needed for _fetch_files
    files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})]
    # Only download if directory is empty
    # Directory will have been created by the call to _get_dataset_dir above
    if not os.listdir(data_dir):
        downloaded_files = _fetch_files(data_dir,
                                        files_spec,
                                        resume=True,
                                        verbose=verbose)
        _uncompress_file(downloaded_files[0])

    file_list = [
        os.path.join(path, f) for path, dirs, files in os.walk(data_dir)
        for f in files
    ]
    return data_dir, sorted(file_list)
Ejemplo n.º 31
0
def fetch_atlas_gordon_2014(coordinate_system='MNI', resolution=2,
                            data_dir=None, url=None, resume=True, verbose=1):
    """Download and returns Gordon et al. 2014 atlas

    References
    ----------
    Gordon, E. M., Laumann, T. O., Adeyemo, B., Huckins, J. F., Kelley, W. M., &
    Petersen, S. E., "Generation and evaluation of a cortical area
    parcellation from resting-state correlations", 2014, Cerebral cortex, bhu239.

    See http://www.nil.wustl.edu/labs/petersen/Resources.html for more
    information on this parcellation.
    """
    if url is None:
        url = ("https://sites.wustl.edu/petersenschlaggarlab/files/"
               "2018/06/Parcels-19cwpgu.zip")
    dataset_name = "gordon_2014"
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)

    valid_coordinate_systems = ['MNI', '711-2b']

    if coordinate_system not in valid_coordinate_systems:
        raise ValueError('Unknown coordinate system {0}. '
                         'Valid options are {1}'.format(
                             coordinate_system, valid_coordinate_systems))

    if resolution not in [1, 2, 3]:
        raise ValueError('Invalid resolution {0}. '
                         'Valid options are 1, 2 or 3.'.format(resolution))

    target_file = os.path.join('Parcels', 'Parcels_{0}_{1}.nii'.format(
        coordinate_system, str(resolution) * 3))

    atlas = _fetch_files(data_dir, [(target_file, url, {"uncompress": True})],
                         resume=resume, verbose=verbose)

    return atlas
Ejemplo n.º 32
0
def fetch_kirby(subjects=range(2), sessions=[1], data_dir=None, url=None,
                resume=True, verbose=1):
    """Download and load the KIRBY multi-modal dataset.

    Parameters
    ----------
    subjects : sequence of int or None, optional
        ids of subjects to load, default to loading 2 subjects.

    sessions: iterable of int, optional
        The sessions to load. Load only the first session by default.

    data_dir: string, optional
        Path of the data directory. Used to force data storage in a specified
        location. Default: None

    url: string, optional
        Override download URL. Used for test only (or if you setup a mirror of
        the data). Default: None

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are :
         - 'anat': Paths to structural MPRAGE images
         - 'asl': Paths to ASL images
         - 'm0': Paths to ASL M0 images

    Notes
    ------

    This dataset is composed of 2 sessions of 21 participants (11 males) at 3T.
    Imaging modalities include MPRAGE, FLAIR,
    DTI, resting state fMRI, B0 and B1 field maps, ASL, VASO, quantitative T1
    mapping, quantitative T2 mapping, and magnetization transfer imaging.
    For each session, we only download MPRAGE and ASL data.

    More details about this dataset can be found here :
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263
    http://mri.kennedykrieger.org/databases.html

    Paper to cite
    -------------
        `Multi-Parametric Neuroimaging Reproducibility: A 3T Resource Study
        <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263>`_
        Bennett. A. Landman, Alan J. Huang, Aliya Gifford,Deepti S. Vikram,
        Issel Anne L. Lim, Jonathan A.D. Farrell, John A. Bogovic, Jun Hua,
        Min Chen,
        Samson Jarso, Seth A. Smith, Suresh Joel, Susumu Mori, James J. Pekar,
        Peter B. Barker, Jerry L. Prince, and Peter C.M. van Zijl.
        NeuroImage. (2010)
        NIHMS/PMC:252138 doi:10.1016/j.neuroimage.2010.11.047

    Licence
    -------
    `BIRN Data License
    <http://www.nbirn.net/bdr/Data_Use_Agreement_09_19_07-1.pdf>`_
    """

    if url is None:
        url = 'https://www.nitrc.org/frs/downloadlink.php/'

    # Preliminary checks and declarations
    dataset_name = 'kirby'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    subject_ids = np.array([
        '849', '934', '679', '906', '913', '142', '127', '742', '422', '815',
        '906', '239', '916', '959', '814', '505', '959', '492', '239', '142',
        '815', '679', '800', '916', '849', '814', '800', '656', '742', '113',
        '913', '502', '113', '127', '505', '502', '934', '492', '346', '656',
        '346', '422'])
    nitrc_ids = np.arange(2201, 2243)
    ids = np.arange(1, 43)

    # Group indices by session
    _, indices1 = np.unique(subject_ids, return_index=True)
    subject_ids1 = subject_ids[sorted(indices1)]
    nitrc_ids1 = nitrc_ids[sorted(indices1)]
    ids1 = ids[sorted(indices1)]

    tuple_indices = [np.where(subject_ids == s)[0] for s in subject_ids1]
    indices2 = [idx1 if idx1 not in indices1 else idx2
                for (idx1, idx2) in tuple_indices]
    subject_ids2 = subject_ids[indices2]
    nitrc_ids2 = nitrc_ids[indices2]
    ids2 = ids[indices2]

    # Check arguments
    max_subjects = len(subject_ids)
    if max(subjects) > max_subjects:
        warnings.warn('Warning: there are only {0} subjects'.format(
            max_subjects))
        subjects = range(max_subjects)
    unique_subjects, indices = np.unique(subjects, return_index=True)
    if len(unique_subjects) < len(subjects):
        warnings.warn('Warning: Duplicate subjects, removing them.')
        subjects = unique_subjects[np.argsort(indices)]

    n_subjects = len(subjects)

    archives = [
        [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for
         (nitrc_id, id) in zip(nitrc_ids1, ids1)],
        [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for
         (nitrc_id, id) in zip(nitrc_ids2, ids2)]
                ]
    anat1 = [os.path.join('session1', subject,
                          'KKI2009-{0:02}-MPRAGE.nii'.format(i))
             for subject, i in zip(subject_ids1, ids1)]
    anat2 = [os.path.join('session2', subject,
                          'KKI2009-{0:02}-MPRAGE.nii'.format(i))
             for subject, i in zip(subject_ids2, ids2)]
    asl1 = [os.path.join('session1', subject,
                         'KKI2009-{0:02}-ASL.nii'.format(i))
            for subject, i in zip(subject_ids1, ids1)]
    asl2 = [os.path.join('session2', subject,
                         'KKI2009-{0:02}-ASL.nii'.format(i))
            for subject, i in zip(subject_ids2, ids2)]
    m01 = [os.path.join('session1', subject,
                        'KKI2009-{0:02}-ASLM0.nii'.format(i))
           for subject, i in zip(subject_ids1, ids1)]
    m02 = [os.path.join('session2', subject,
                        'KKI2009-{0:02}-ASLM0.nii'.format(i))
           for subject, i in zip(subject_ids2, ids2)]

    target = [
        [os.path.join('session1', subject, 'KKI2009-{0:02}.tar.bz2'.format(id))
         for (subject, id) in zip(subject_ids1, ids1)],
        [os.path.join('session2', subject, 'KKI2009-{0:02}.tar.bz2'.format(id))
         for (subject, id) in zip(subject_ids2, ids2)]
                ]
    anat = [anat1, anat2]
    asl = [asl1, asl2]
    m0 = [m01, m02]

    source_anat = []
    source_asl = []
    source_m0 = []
    source_archives = []
    session = []
    target_archives = []
    for i in sessions:
        if not (i in [1, 2]):
            raise ValueError('KIRBY dataset session id must be in [1, 2]')
        source_anat += [anat[i - 1][subject] for subject in subjects]
        source_asl += [asl[i - 1][subject] for subject in subjects]
        source_m0 += [m0[i - 1][subject] for subject in subjects]
        source_archives += [archives[i - 1][subject] for subject in subjects]
        target_archives += [target[i - 1][subject] for subject in subjects]

        session += [i] * n_subjects

    # Dataset description
    fdescr = _get_dataset_descr(dataset_name)

    # Call fetch_files once per subject.
    asl = []
    m0 = []
    anat = []
    for anat_u, asl_u, m0_u, archive, target in zip(source_anat, source_asl,
                                                    source_m0, source_archives,
                                                    target_archives):
        n, a, m = _fetch_files(
            data_dir,
            [(anat_u, archive, {'uncompress': True, 'move': target}),
             (asl_u, archive, {'uncompress': True, 'move': target}),
             (m0_u, archive, {'uncompress': True, 'move': target})],
            verbose=verbose)

        anat.append(n)
        asl.append(a)
        m0.append(m)

    return Bunch(anat=anat, asl=asl, m0=m0, session=session,
                 description=fdescr)
Ejemplo n.º 33
0
def fetch_openneuro_dataset(
        urls=None, data_dir=None, dataset_version='ds000030_R1.0.4',
        verbose=1):
    """Download openneuro bids dataset.

    Note: This function requires boto3 to be installed.

    Parameters
    ----------
    urls: list of string, optional
        Openneuro url list of dataset files to download. If not specified
        all files of the specified dataset will be downloaded.

    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    boto3 = _check_import_boto3("boto3")
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir,
                                verbose=verbose)

    # if urls are not specified we download the complete dataset index
    if urls is None:
        _, urls = fetch_openneuro_dataset_index(
            data_dir=data_dir, dataset_version=dataset_version, verbose=verbose)

    # The files_spec needed for _fetch_files
    files_spec = []
    files_dir = []
    for url in urls:
        url_path = url.split(data_prefix + '/')[1]
        file_dir = os.path.join(data_dir, url_path)
        files_spec.append((os.path.basename(file_dir), url, {}))
        files_dir.append(os.path.dirname(file_dir))

    # download the files
    downloaded = []
    for file_spec, file_dir in zip(files_spec, files_dir):
        # Timeout errors are common in the s3 connection so we try to avoid
        # failure of the dataset download for a transient instability
        success = False
        download_attempts = 4
        while download_attempts > 0 and not success:
            try:
                downloaded_files = _fetch_files(
                    file_dir, [file_spec], resume=True, verbose=verbose)
                downloaded += downloaded_files
                success = True
            except Exception:
                download_attempts -= 1
        if not success:
            raise Exception('multiple failures downloading %s' % file_spec[1])

    return data_dir, sorted(downloaded)
Ejemplo n.º 34
0
def fetch_microarray(data_dir=None,
                     donors=['9861'],
                     resume=True,
                     verbose=1,
                     convert=True):
    """
    Downloads the Allen Human Brain Atlas microarray expression dataset

    Parameters
    ----------
    data_dir : str, optional
        Directory where data should be downloaded and unpacked. Default:
        current directory
    donors : list, optional
        List of donors to download; can be either donor number or UID. Can also
        specify 'all' to download all available donors. Default: 9861
    resume : bool, optional
        Whether to resume download of a partly-downloaded file. Default: True
    verbose : int, optional
        Verbosity level (0 means no message). Default: 1
    convert : bool, optional
        Whether to convert downloaded CSV files into parquet format for faster
        loading in the future; only available if ``fastparquet`` and ``python-
        snappy`` are installed. Default: True

    Returns
    -------
    data : :class:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['microarray', 'ontology', 'pacall',
        'probes', 'annotation'], where corresponding values are lists of
        filepaths to downloaded CSV files.

    References
    ----------
    Hawrylycz, M. J., Lein, E. S., Guillozet-Bongaarts, A. L., Shen, E. H., Ng,
    L., Miller, J. A., ... & Abajian, C. (2012). An anatomically comprehensive
    atlas of the adult human brain transcriptome. Nature, 489(7416), 391.
    """

    url = "https://human.brain-map.org/api/v2/well_known_file_download/{}"

    dataset_name = 'allenbrain'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    sub_files = ('MicroarrayExpression.csv', 'Ontology.csv', 'PACall.csv',
                 'Probes.csv', 'SampleAnnot.csv')
    n_files = len(sub_files)

    if donors is not None and (isinstance(donors, (list, tuple))):
        for n, sub_id in enumerate(donors):
            if sub_id not in VALID_DONORS:
                raise ValueError(
                    'You provided invalid subject id {0} in a'
                    'list. Subjects must be selected in {1}.'.format(
                        sub_id, VALID_DONORS))
            donors[n] = WELL_KNOWN_IDS[sub_id]  # convert to ID system
    elif donors == 'all':
        donors = WELL_KNOWN_IDS.value_set('subj')
    else:
        donors = []
    donors = sorted(set(donors), key=lambda x: int(x))  # avoid duplicates

    files = [
        (os.path.join('normalized_microarray_donor{}'.format(sub),
                      fname), url.format(WELL_KNOWN_IDS.url[sub]),
         dict(uncompress=True,
              move=os.path.join('normalized_microarray_donor{}'.format(sub),
                                'donor{}.zip'.format(sub)))) for sub in donors
        for fname in sub_files
    ]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    # if we want to convert files to parquet format it's good to do that now
    # this step is _already_ super long, so an extra 1-2 minutes is negligible
    if convert and io.use_parq:
        for fn in files[0::n_files] + files[2::n_files]:
            io._make_parquet(fn, convert_only=True)

    return Bunch(microarray=files[0::n_files],
                 ontology=files[1::n_files],
                 pacall=files[2::n_files],
                 probes=files[3::n_files],
                 annotation=files[4::n_files])
Ejemplo n.º 35
0
def fetch_tutorial_data(n_subjects=20, data_dir=None, resume=True, verbose=1):
    """Download and load the surfstat tutorial dataset.

    Parameters
    ----------
    n_subjects: int, optional
        The number of subjects to load from maximum of 100 subjects.
        By default, 20 subjects will be loaded. If None is given,
        all 100 subjects will be loaded.
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a specified
        location. If None, data will be download to ~ (home directory).
        Default: None
    resume: bool, optional
        If true, try resuming download if possible

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are :
         - 'image_files': Paths to image files in mgh format
         - 'demographics': Path to CSV file containing demographic information

    References
    ----------

    :Download: https://box.bic.mni.mcgill.ca/s/wMPF2vj7EoYWELV

    """

    # set dataset url
    url = "https://box.bic.mni.mcgill.ca/s/wMPF2vj7EoYWELV"

    # set data_dir, if not directly set use ~ as default
    if data_dir is None:
        data_dir = str(Path.home())

    # set dataset name and get its corresponding directory
    dataset_name = "brainstat_tutorial"
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    # set download information for demographic file
    files = [(
        "brainstat_tutorial_df.csv",
        url + "/download?path=%2FSurfStat_tutorial_data&files=myStudy.csv",
        {
            "move": "brainstat_tutorial_df.csv"
        },
    )]

    # download demographic file
    path_to_demographics = _fetch_files(data_dir, files, verbose=verbose)[0]

    # set ids based on complete dataset from demographic file
    ids = pd.read_csv(path_to_demographics)["ID2"].tolist()

    # set and check subjects, in total and subset
    max_subjects = len(ids)
    if n_subjects is None:
        n_subjects = max_subjects
    if n_subjects > max_subjects:
        warnings.warn("Warning: there are only %d subjects" % max_subjects)
        n_subjects = max_subjects
    ids = ids[:n_subjects]

    # restrict demographic information to subset of subjects
    df_tmp = pd.read_csv(path_to_demographics)
    df_tmp = df_tmp[df_tmp["ID2"].isin(ids)]

    # set download information for image files and download them
    # for hemi in ['lh', 'rh']:
    image_files = _fetch_files(
        data_dir,
        [(
            "thickness/{}_{}2fsaverage5_20.mgh".format(subj, hemi),
            url + "/download?path=%2F&files=brainstat_tutorial.zip",
            {
                "uncompress": True,
                "move": "brainstat_tutorial.zip"
            },
        ) for subj in ids for hemi in ["lh", "rh"]],
    )

    # pack everything in a scikit-learn bunch and return it
    return Bunch(demographics=df_tmp, image_files=image_files)