Exemple #1
0
def fetch_adhd(n_subjects=40, data_dir=None,
               url=None, resume=True,
               modl_data_dir=None,
               mask_url=None,
               verbose=1):
    dataset = nilearn_fetch_adhd(n_subjects=n_subjects,
                                 data_dir=data_dir, url=url, resume=resume,
                                 verbose=verbose)
    root_dir = dataset.func[0]
    tail_dir = ''
    while tail_dir != 'adhd':
        root_dir, tail_dir = os.path.split(root_dir)
    root_dir = os.path.join(root_dir, tail_dir)

    modl_data_dir = get_data_dirs(modl_data_dir)[0]
    mask_data_dir = join(modl_data_dir, 'adhd')
    if mask_url is None:
        mask_url = 'http://amensch.fr/data/adhd/mask_img.nii.gz'
    _fetch_file(mask_url, mask_data_dir, resume=resume)
    mask_img = join(mask_data_dir, 'mask_img.nii.gz')
    behavioral = pd.DataFrame(dataset.phenotypic)
    behavioral.loc[:, 'Subject'] = pd.to_numeric(behavioral.loc[:, 'Subject'])
    behavioral.set_index('Subject', inplace=True)
    behavioral.index.names = ['subject']
    rest = pd.DataFrame(data=list(zip(dataset.func, dataset.confounds)),
                        columns=['filename', 'confounds'],
                        index=behavioral.index)
    return Bunch(rest=rest,
                 behavioral=behavioral, description=dataset.description,
                 mask=mask_img, root=root_dir)
Exemple #2
0
def fetch_fiac_first_level(data_dir=None, verbose=1):
    """ Download a first-level fiac fMRI dataset (2 sessions)

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.
    """
    data_dir = _get_dataset_dir('fiac_nistats',
                                data_dir=data_dir,
                                verbose=verbose)

    def _glob_fiac_data():
        """glob data from subject_dir."""
        _subject_data = {}
        subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0')
        for session in [1, 2]:
            # glob func data for session session + 1
            session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session)
            if not os.path.isfile(session_func):
                print('Missing functional scan for session %i.' % session)
                return None

            _subject_data['func%i' % session] = session_func

            # glob design matrix .npz file
            sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session)
            if not os.path.isfile(sess_dmtx):
                print('Missing session file: %s' % sess_dmtx)
                return None

            _subject_data['design_matrix%i' % session] = sess_dmtx

        # glob for mask data
        mask = os.path.join(subject_dir, 'mask.nii.gz')
        if not os.path.isfile(mask):
            print('Missing mask image.')
            return None

        _subject_data['mask'] = mask
        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_fiac_data()
    if data is not None:
        return data

    # No. Download the data
    print('Data absent, downloading...')
    url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz'

    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print('Archive corrupted, trying to download it again.')
        return fetch_fiac_first_level(data_dir=data_dir)

    return _glob_fiac_data()
Exemple #3
0
def fetch_fiac_first_level(data_dir=None, verbose=1):
    """ Download a first-level fiac fMRI dataset (2 sessions)

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.
    """
    data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir,
                                verbose=verbose)
    def _glob_fiac_data():
        """glob data from subject_dir."""
        _subject_data = {}
        subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0')
        for session in [1, 2]:
            # glob func data for session
            session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session)
            if not os.path.isfile(session_func):
                print('Missing functional scan for session %i.' % session)
                return None

            _subject_data['func%i' % session] = session_func

            # glob design matrix .npz file
            sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session)
            if not os.path.isfile(sess_dmtx):
                print('Missing session file: %s' % sess_dmtx)
                return None

            _subject_data['design_matrix%i' % session] = sess_dmtx

        # glob for mask data
        mask = os.path.join(subject_dir, 'mask.nii.gz')
        if not os.path.isfile(mask):
            print('Missing mask image.')
            return None

        _subject_data['mask'] = mask
        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_fiac_data()
    if data is not None:
        return data

    # No. Download the data
    print('Data absent, downloading...')
    url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz'

    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print('Archive corrupted, trying to download it again.')
        return fetch_fiac_first_level(data_dir=data_dir)

    return _glob_fiac_data()
Exemple #4
0
def fetch_hcp_mask(data_dir=None, url=None, resume=True):
    data_dir = get_data_dirs(data_dir)[0]
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    data_dir = join(data_dir, 'parietal')
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if url is None:
        url = 'http://amensch.fr/data/cogspaces/mask/mask_img.nii.gz'
    _fetch_file(url, data_dir, resume=resume)
    return join(data_dir, 'mask_img.nii.gz')
Exemple #5
0
def _download_spm_auditory_data(data_dir, subject_dir, subject_id):
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir, data_name="",
                                  subject_id=subject_id)
Exemple #6
0
def fetch_adni(data_dir=None):
    """Fetch ADNI timeseries data from Open Science Framework (OSF)

    Parameters
    ----------
    data_dir : string
        Path where data should be downloaded

    Returns
    -------
    data_dir : string
        Path to the downloaded timeseries directory
    """
    if data_dir is None:
        warnings.warn('Data downloading is requested but data_dir is not '
                      'provided. Downloading to the current directory with '
                      'folder name ADNI', stacklevel=2)
        data_dir = './ADNI'

    url = 'https://osf.io/xhrcs/download'

    # Download the zip file, first
    dl_file = _fetch_file(url, data_dir=data_dir)

    # Second, uncompress the downloaded zip file
    _uncompress_file(dl_file, verbose=2)

    return data_dir
Exemple #7
0
def fetch_behavioral_data(data_dir=None, restricted=False, overwrite=False):
    _, _, username, password = get_credentials(data_dir=data_dir)
    data_dir = get_data_dirs(data_dir)[0]
    behavioral_dir = join(data_dir, 'behavioral')
    if not os.path.exists(behavioral_dir):
        os.makedirs(behavioral_dir)
    csv_unrestricted = join(behavioral_dir, 'hcp_unrestricted_data.csv')
    if not os.path.exists(csv_unrestricted) or overwrite:
        result = _fetch_file(data_dir=data_dir,
                             url='https://db.humanconnectome.org/REST/'
                             'search/dict/Subject%20Information/results?'
                             'format=csv&removeDelimitersFromFieldValues'
                             '=true'
                             '&restricted=0&project=HCP_900',
                             username=username,
                             password=password)
        os.rename(result, csv_unrestricted)
    csv_restricted = join(behavioral_dir, 'hcp_restricted_data.csv')
    df_unrestricted = pd.read_csv(csv_unrestricted)
    df_unrestricted.set_index('Subject', inplace=True)
    if restricted and not os.path.exists(csv_restricted):
        warnings.warn("Cannot automatically retrieve restricted data. "
                      "Please create the file '%s' manually" % csv_restricted)
        restricted = False
    if not restricted:
        df = df_unrestricted
    else:
        df_restricted = pd.read_csv(csv_restricted)
        df_restricted.set_index('Subject', inplace=True)
        df = df_unrestricted.join(df_restricted, how='outer')
    df.sort_index(ascending=True, inplace=True)
    df.index.names = ['subject']
    return df
Exemple #8
0
def _download_data_spm_multimodal(data_dir, subject_dir, subject_id):
    print('Data absent, downloading...')
    urls = [
        # fmri
        ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/'
         'multimodal_fmri.zip'),
        # structural
        ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/'
         'multimodal_smri.zip')
    ]

    for url in urls:
        archive_path = os.path.join(subject_dir, os.path.basename(url))
        _fetch_file(url, subject_dir)
        try:
            _uncompress_file(archive_path)
        except:  # noqa:E722
            print('Archive corrupted, trying to download it again.')
            return fetch_spm_multimodal_fmri(data_dir=data_dir,
                                             data_name='',
                                             subject_id=subject_id)

    return _glob_spm_multimodal_fmri_data(subject_dir)
Exemple #9
0
def _download_data_spm_multimodal(data_dir, subject_dir, subject_id):
    print("Data absent, downloading...")
    urls = [
        # fmri
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
        "multimodal_fmri.zip"),

        # structural
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
         "multimodal_smri.zip")
        ]

    for url in urls:
        archive_path = os.path.join(subject_dir, os.path.basename(url))
        _fetch_file(url, subject_dir)
        try:
            _uncompress_file(archive_path)
        except:
            print("Archive corrupted, trying to download it again.")
            return fetch_spm_multimodal_fmri(data_dir=data_dir,
                                             data_name="",
                                             subject_id=subject_id)

    return _glob_spm_multimodal_fmri_data(subject_dir)
Exemple #10
0
def download_collection(collection=None,
                        data_dir=None,
                        overwrite=False,
                        resume=True,
                        verbose=1):
    """
    Download images and metadata from Neurovault collection

    Args:
        collection (int, optional): collection id. Defaults to None.
        data_dir (str, optional): data directory. Defaults to None.
        overwrite (bool, optional): overwrite data directory. Defaults to False.
        resume (bool, optional): resume download. Defaults to True.
        verbose (int, optional): print diagnostic messages. Defaults to 1.

    Returns:
        (pd.DataFrame, list): (DataFrame of image metadata, list of files from downloaded collection)
    """

    if data_dir is None:
        data_dir = _get_dataset_dir(str(collection),
                                    data_dir=data_dir,
                                    verbose=verbose)

    # Get collection Metadata
    metadata = get_collection_image_metadata(collection=collection,
                                             data_dir=data_dir)

    # Get images
    files = []
    for f in metadata["file"]:
        files.append(
            _fetch_file(f,
                        data_dir,
                        resume=resume,
                        verbose=verbose,
                        overwrite=overwrite))

    return (metadata, files)
Exemple #11
0
def download_collection(collection=None,
                        data_dir=None,
                        overwrite=False,
                        resume=True,
                        verbose=1):
    ''' Download images and metadata from Neurovault collection

	Args:
		collection:  (int) collection id
		data_dir:	(str) data directory

	Returns:
		metadata:	(pd.DataFrame) Dataframe with full image metadata from
					collection
		files:		(list) list of files of downloaded collection

	'''

    if data_dir is None:
        data_dir = _get_dataset_dir(str(collection),
                                    data_dir=data_dir,
                                    verbose=verbose)

    # Get collection Metadata
    metadata = get_collection_image_metadata(collection=collection,
                                             data_dir=data_dir)

    # Get images
    files = []
    for f in metadata['file']:
        files.append(
            _fetch_file(f,
                        data_dir,
                        resume=resume,
                        verbose=verbose,
                        overwrite=overwrite))

    return (metadata, files)
Exemple #12
0
def fetch_openfmri_dataset(dataset_name='ds000001', dataset_revision=None,
                           data_dir=None, verbose=1):
    """Download latest revision of specified bids dataset.

    Compressed files will not be uncompressed automatically due to the expected
    great size of downloaded dataset.

    Only datasets that contain preprocessed files following the official
    conventions of the future BIDS derivatives specification can be used out
    of the box with Nistats. Otherwise custom preprocessing would need to be
    performed, optionally following the BIDS derivatives specification for the
    preprocessing output files.

    Parameters
    ----------
    dataset_name: string, optional
        Accesion number as published in https://openfmri.org/dataset/.
        Downloads by default dataset ds000001.

    dataset_revision: string, optional
        Revision as presented in the specific dataset link accesible
        from https://openfmri.org/dataset/. Looks for the latest by default.

    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    # We download a json file with all the api data from the openfmri server
    openfmri_api = 'https://openfmri.org/dataset/api'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    files = _fetch_file(openfmri_api, data_dir)
    json_api = json.load(open(files, 'r'))

    dataset_url_set = []
    for i in range(len(json_api)):
        # We look for the desired dataset in the json api file
        if dataset_name == json_api[i]['accession_number']:
            # Now we look for the desired revision or the last one
            if not dataset_revision:
                revision = json_api[i]['revision_set']
                if revision:
                    dataset_revision = revision[-1]['revision_number']
            # After selecting the revision we download all its files
            link_set = json_api[i]['link_set']
            for link in link_set:
                revision = link['revision']
                if revision == dataset_revision:
                    dataset_url_set.append(link['url'])
            # If revision is specified but no file is found there is an issue
            if dataset_revision and not dataset_url_set:
                Exception('No files found for revision %s' % dataset_revision)
            break

    if not dataset_url_set:
        raise ValueError('dataset %s not found' % dataset_name)
    else:
        # The files_spec needed for _fetch_files
        files_spec = []
        for dat_url in dataset_url_set:
            target_file = os.path.basename(dat_url)
            url = dat_url
            files_spec.append((target_file, url, {}))
        # download the files
        downloaded_files = _fetch_files(data_dir, files_spec, resume=True,
                                        verbose=verbose)
    return data_dir, downloaded_files
Exemple #13
0
def fetch_localizer(subject_ids=None,
                    get_anats=False,
                    data_type='raw',
                    data_dir=None,
                    url=None,
                    resume=True,
                    verbose=1):
    """ Download and load Brainomics Localizer dataset (94 subjects).
    "The Functional Localizer is a simple and fast acquisition
    procedure based on a 5-minute functional magnetic resonance
    imaging (fMRI) sequence that can be run as easily and as
    systematically as an anatomical scan. This protocol captures the
    cerebral bases of auditory and visual perception, motor actions,
    reading, language comprehension and mental calculation at an
    individual level. Individual functional maps are reliable and
    quite precise. The procedure is decribed in more detail on the
    Functional Localizer page." This code is modified from
    `fetch_localizer_contrasts` from nilearn.datasets.funcs.py.
    (see http://brainomics.cea.fr/localizer/)
    "Scientific results obtained using this dataset are described in
    Pinel et al., 2007" [1]

    Notes:
    It is better to perform several small requests than a big one because the
    Brainomics server has no cache (can lead to timeout while the archive
    is generated on the remote server).  For example, download
    n_subjects=np.array(1,10), then n_subjects=np.array(10,20), etc.

    Args:
        subject_ids: (list) List of Subject IDs (e.g., ['S01','S02'].
                     If None is given, all 94 subjects are used.
        get_anats: (boolean) Whether individual structural images should be
                    fetched or not.
        data_type: (string) type of data to download.
                    Valid values are ['raw','preprocessed']
        data_dir: (string, optional) Path of the data directory.
                    Used to force data storage in a specified location.
        url: (string, optional) Override download URL.
             Used for test only (or if you setup a mirror of the data).
        resume: (bool) Whether to resume download of a partly-downloaded file.
        verbose: (int) Verbosity level (0 means no message).

    Returns:
        data: (Bunch)
            Dictionary-like object, the interest attributes are :
            - 'functional': string list
                Paths to nifti contrast maps
            - 'structural' string
                Path to nifti files corresponding to the subjects structural images

    References
    ----------
    Pinel, Philippe, et al.
    "Fast reproducible identification and large-scale databasing of
    individual functional cognitive networks."
    BMC neuroscience 8.1 (2007): 91.

    """

    if subject_ids is None:
        subject_ids = ['S%02d' % x for x in np.arange(1, 95)]
    elif not isinstance(subject_ids, (list)):
        raise ValueError(
            "subject_ids must be a list of subject ids (e.g., ['S01','S02'])")

    if data_type == 'raw':
        dat_type = "raw fMRI"
        dat_label = "raw bold"
        anat_type = "raw T1"
        anat_label = "raw anatomy"
    elif data_type == 'preprocessed':
        dat_type = "preprocessed fMRI"
        dat_label = "bold"
        anat_type = "normalized T1"
        anat_label = "anatomy"
    else:
        raise ValueError(
            "Only ['raw','preprocessed'] data_types are currently supported.")

    root_url = "http://brainomics.cea.fr/localizer/"
    dataset_name = 'brainomics_localizer'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    fdescr = _get_dataset_descr(dataset_name)
    opts = {'uncompress': True}

    bold_files = []
    anat_files = []
    for subject_id in subject_ids:
        base_query = ("Any X,XT,XL,XI,XF,XD WHERE X is Scan, X type XT, "
                      "X concerns S, "
                      "X label XL, X identifier XI, "
                      "X format XF, X description XD, "
                      'S identifier = "%s", ' % (subject_id, ) +
                      'X type IN(%(types)s), X label "%(label)s"')

        file_tarball_url = "%sbrainomics_data.zip?rql=%s&vid=data-zip" % (
            root_url,
            _urllib.parse.quote(base_query % {
                "types": "\"%s\"" % dat_type,
                "label": dat_label
            },
                                safe=',()'))
        name_aux = str.replace(str.join('_', [dat_type, dat_label]), ' ', '_')
        file_path = os.path.join("brainomics_data", subject_id,
                                 "%s.nii.gz" % name_aux)
        bold_files.append(
            _fetch_file(data_dir, [(file_path, file_tarball_url, opts)],
                        verbose=verbose))

        if get_anats:
            file_tarball_url = "%sbrainomics_data_anats.zip?rql=%s&vid=data-zip" % (
                root_url,
                _urllib.parse.quote(base_query % {
                    "types": "\"%s\"" % anat_type,
                    "label": anat_label
                },
                                    safe=',()'))
            if data_type == 'raw':
                anat_name_aux = "raw_T1_raw_anat_defaced.nii.gz"
            elif data_type == 'preprocessed':
                anat_name_aux = "normalized_T1_anat_defaced.nii.gz"
            file_path = os.path.join("brainomics_data", subject_id,
                                     anat_name_aux)
            anat_files.append(
                _fetch_file(data_dir, [(file_path, file_tarball_url, opts)],
                            verbose=verbose))

    # Fetch subject characteristics (separated in two files)
    if url is None:
        url_csv = ("%sdataset/cubicwebexport.csv?rql=%s&vid=csvexport" %
                   (root_url, _urllib.parse.quote("Any X WHERE X is Subject")))
        url_csv2 = ("%sdataset/cubicwebexport2.csv?rql=%s&vid=csvexport" %
                    (root_url,
                     _urllib.parse.quote(
                         "Any X,XI,XD WHERE X is QuestionnaireRun, "
                         "X identifier XI, X datetime "
                         "XD",
                         safe=',')))
    else:
        url_csv = "%s/cubicwebexport.csv" % url
        url_csv2 = "%s/cubicwebexport2.csv" % url

    filenames = [("cubicwebexport.csv", url_csv, {}),
                 ("cubicwebexport2.csv", url_csv2, {})]
    csv_files = _fetch_file(data_dir, filenames, verbose=verbose)
    metadata = pd.merge(pd.read_csv(csv_files[0], sep=';'),
                        pd.read_csv(csv_files[1], sep=';'),
                        on='"subject_id"')
    metadata.to_csv(os.path.join(data_dir, 'metadata.csv'))
    for x in ['cubicwebexport.csv', 'cubicwebexport2.csv']:
        os.remove(os.path.join(data_dir, x))

    if not get_anats:
        anat_files = None

    return Bunch(functional=bold_files,
                 structural=anat_files,
                 ext_vars=metadata,
                 description=fdescr)
Exemple #14
0
def fetch_spm_auditory(data_dir=None,
                       data_name='spm_auditory',
                       subject_id="sub001",
                       verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_auditory_data():
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        for file_name in SPM_AUDITORY_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path):
                subject_data[file_name] = file_path
            else:
                print("%s missing from filelist!" % file_name)
                return None

        _subject_data = {}
        _subject_data["func"] = sorted([
            subject_data[x] for x in subject_data.keys()
            if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))
        ])

        # volumes for this dataset of shape (64, 64, 64, 1); let's fix this
        for x in _subject_data["func"]:
            vol = nibabel.load(x)
            if len(vol.shape) == 4:
                vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                          vol.get_affine())
                nibabel.save(vol, x)

        _subject_data["anat"] = [
            subject_data[x] for x in subject_data.keys()
            if re.match("^sM00223_002\.img$", os.path.basename(x))
        ][0]

        # ... same thing for anat
        vol = nibabel.load(_subject_data["anat"])
        if len(vol.shape) == 4:
            vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                      vol.get_affine())
            nibabel.save(vol, _subject_data["anat"])

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_auditory_data()
    if data is not None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir,
                                  data_name="",
                                  subject_id=subject_id)

    return _glob_spm_auditory_data()
Exemple #15
0
def fetch_lemur_mircen_2019_t2(subjects=[0],
                               data_dir=None,
                               url=None,
                               resume=True,
                               verbose=1):
    """Download and loads the mouse lemur template dataset.

    Parameters
    ----------
    subjects : sequence of int or None, optional
        ids of subjects to load, default to loading one subject.

    data_dir : string, optional
        Path of the data directory. Used to force data storage in a specified
        location. Default: None

    resume : bool, optional (default True)
        If true, try resuming download if possible.

    verbose : int, optional (default 0)
        Defines the level of verbosity of the output.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are :

        - 'anat': string list. Paths to T2-weighted images.
        - 'phenotypic': Participants genders, birth dates and MRI scan dates

    References
    ----------
    :Download:
        https://openneuro.org/datasets/ds001945/versions/1.0.0/download

    :Reference:
        `A 3D population-based brain atlas of the mouse lemur primate with
        examples of applications in aging studies and comparative anatomy.
        <http://doi:10.1016/j.neuroimage.2018.10.010>`_
        Neuroimage 185 (2019): 85-95. 
        N. A. Nadkarni, S. Bougacha, C. Garin, M. Dhenain, and J. L. Picq. 

    """
    if url is None:
        url = 'https://openneuro.org/crn/datasets/ds001945/snapshots/1.0.0/files'

    dataset_name = 'mircen2019_t2'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    # Check arguments
    max_subjects = 34
    if max(subjects) > max_subjects:
        warnings.warn(
            'Warning: there are only {0} subjects'.format(max_subjects))
        subjects = range(max_subjects)

    subject_ids = np.array(['sub-{0:02d}'.format(i) for i in range(1, 35)])
    subject_ids = subject_ids[subjects]

    # Generate the list of urls
    json_urls = [
        os.path.join(url, '{0}:anat:{0}_T2w.json'.format(subject_id))
        for subject_id in subject_ids
    ]
    anat_urls = [
        os.path.join(url, '{0}:anat:{0}_T2w.nii.gz'.format(subject_id))
        for subject_id in subject_ids
    ]

    # Generate the list of target files
    anat_basenames = [
        '{0}_anat_{0}_T2w.nii.gz'.format(subject_id)
        for subject_id in subject_ids
    ]
    anat_files = [
        os.path.join(animal_dir, anat_basename)
        for (animal_dir, anat_basename) in zip(subject_ids, anat_basenames)
    ]

    json_basenames = [
        '{0}_anat_{0}_T2w.json'.format(subject_id)
        for subject_id in subject_ids
    ]
    json_files = [
        os.path.join(animal_dir, json_basename)
        for (animal_dir, json_basename) in zip(subject_ids, json_basenames)
    ]

    # Call fetch_files once per subject.
    anat = []
    json = []
    for anat_u, anat_f, json_u, json_f in zip(anat_urls, anat_files, json_urls,
                                              json_files):
        a, j = _fetch_files(data_dir, [(anat_f, anat_u, {
            'move': anat_f
        }), (json_f, json_u, {
            'move': json_f
        })],
                            verbose=verbose)
        json.append(j)
        anat.append(a)

    pheno_url = os.path.join(url, 'lemur_atlas_list_t2_bids.csv')
    pheno_file = _fetch_file(pheno_url, data_dir, verbose=verbose)
    phenotypic = np.recfromcsv(
        pheno_file,
        delimiter='\t',
        skip_header=True,
        names=['animal_id', 'gender', 'birthdate', 'mri_date'],
        dtype=['U8', 'U3', 'datetime64[D]', 'datetime64[D]'],
        converters={
            2: _parse_date,
            3: _parse_date
        },
        encoding='U8')
    phenotypic = phenotypic[[
        np.where(phenotypic['animal_id'] == '"' + i + '"')[0][0]
        for i in subject_ids
    ]]
    fdescr = _get_dataset_descr(dataset_name)

    return Bunch(anat=anat, pheno=phenotypic, description=fdescr)
Exemple #16
0
def fetch_spm_multimodal_fmri(data_dir=None,
                              data_name="spm_multimodal_fmri",
                              subject_id="sub001",
                              verbose=1):
    """Fetcher for Multi-modal Face Dataset.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func1': string list. Paths to functional images for session 1
        - 'func2': string list. Paths to functional images for session 2
        - 'trials_ses1': string list. Path to onsets file for session 1
        - 'trials_ses2': string list. Path to onsets file for session 2
        - 'anat': string. Path to anat file

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/

    """

    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_multimodal_fmri_data():
        """glob data from subject_dir."""
        _subject_data = {'slice_order': 'descending'}

        for session in range(2):
            # glob func data for session s + 1
            session_func = sorted(
                glob.glob(
                    os.path.join(subject_dir,
                                 ("fMRI/Session%i/fMETHODS-000%i-*-01.img" %
                                  (session + 1, session + 5)))))
            if len(session_func) < 390:
                print("Missing %i functional scans for session %i." %
                      (390 - len(session_func), session))
                return None

            _subject_data['func%i' % (session + 1)] = session_func

            # glob trials .mat file
            sess_trials = os.path.join(subject_dir,
                                       "fMRI/trials_ses%i.mat" % (session + 1))
            if not os.path.isfile(sess_trials):
                print("Missing session file: %s" % sess_trials)
                return None

            _subject_data['trials_ses%i' % (session + 1)] = sess_trials

        # glob for anat data
        anat = os.path.join(subject_dir, "sMRI/smri.img")
        if not os.path.isfile(anat):
            print("Missing structural image.")
            return None

        _subject_data["anat"] = anat

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_multimodal_fmri_data()
    if data is not None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    urls = [
        # fmri
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
         "multimodal_fmri.zip"),

        # structural
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
         "multimodal_smri.zip")
    ]

    for url in urls:
        archive_path = os.path.join(subject_dir, os.path.basename(url))
        _fetch_file(url, subject_dir)
        try:
            _uncompress_file(archive_path)
        except:
            print("Archive corrupted, trying to download it again.")
            return fetch_spm_multimodal_fmri(data_dir=data_dir,
                                             data_name="",
                                             subject_id=subject_id)

    return _glob_spm_multimodal_fmri_data()
Exemple #17
0
def fetch_zurich_anesthesiant(
        subjects=range(30), url=None, data_dir=None, resume=True, verbose=1):
    """Download and loads the ETH-Zurich anesthesiant dataset.

    Parameters
    ----------
    subjects : sequence of int or None, optional
        ids of subjects to load, default to loading all subjects.

    data_dir: string, optional
        Path of the data directory. Used to force data storage in a specified
        location. Default: None

    resume: bool, optional (default True)
        If true, try resuming download if possible.

    verbose: int, optional (default 0)
        Defines the level of verbosity of the output.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images.
        - 'anesthesiant': string list. Information on used anesthesiant.

    Notes
    ------
    This dataset is composed of 30 male mice with different anesthesia
    protocols.

    References
    ----------
    :Download:
        https://central.xnat.org

    :Reference:
        `Optimization of anesthesia protocol for resting-state fMRI in mice
        based on differential effects of anesthetics on functional connectivity
        patterns.
        <http://dx.doi.org/10.1016/j.neuroimage.2014.08.043>`_
        NeuroImage 102 (2014): 838-847.
        J. Grandjean and A. Schroeter and I. Batata and M. Rudin.
    """
    if url is None:
        url = 'https://central.xnat.org'

    dataset_name = 'zurich_anest'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    # First, fetch the file that references all individual URLs
    json_file = _fetch_file(os.path.join(url, 'data', 'experiments.html'),
                            data_dir,
                            verbose=verbose)

    # Return the json file contents as a dictionary
    with open(json_file) as json_data:
        rows = json.load(json_data).values()[0]['Result']

    names = [name for name in rows[0].keys()]
    projects = {}
    for name in names:
        projects[name] = np.array([row[name] for row in rows])

    # Collect directories for all mice in the anesthesiant dataset
    iso_ids = [
        'iso2273', 'iso2274', 'iso2238', 'iso2239', 'iso2250', 'iso2270',
        'iso3294', 'iso3296'
    ]
    med_ids = [
        'med2259', 'med2241', 'med2247', 'med2251', 'med2256', 'med2257'
    ]
    mi_ids = [
        'mi272871', 'mi273299', 'mi273457', 'mi273458', 'mi273459', 'mi273460',
        'mi273461', 'mi273300'
    ]
    med_half_dose_ids = [
        'medHalfDose', 'medHalfDose1', 'medHalfDose2', 'medHalfDose3'
    ]
    iso1_c3_ids = ['iso1c3perc', 'iso1c3perc']
    iso1_c5_ids = ['iso1c5perc', 'iso2870_1c5perc']

    subjects_ids = iso_ids + med_ids + mi_ids + med_half_dose_ids + \
        iso1_c3_ids + iso1_c5_ids
    subjects_labels = ['Iso1'] * len(iso_ids) + ['Med'] * len(med_ids) + \
                      ['Med-Iso'] * len(mi_ids) + \
                      ['Med-half'] * len(med_half_dose_ids) + \
                      ['Iso1pt3'] * len(iso1_c3_ids) + \
                      ['Iso1pt5'] * len(iso1_c5_ids)

    max_subjects = len(subjects_ids)

    # Check arguments
    max_subjects = len(subjects_ids)
    if subjects is None:
        subjects = range(max_subjects)
    elif max(subjects) > max_subjects:
        warnings.warn(
            'Warning: there are only {0} subjects'.format(max_subjects))
        subjects = range(max_subjects)
    unique_subjects, indices = np.unique(subjects, return_index=True)
    if len(unique_subjects) < len(subjects):
        warnings.warn('Warning: Duplicate subjects, removing them.')
        subjects = unique_subjects[np.argsort(indices)]

    subjects_ids = [subjects_ids[subject] for subject in subjects]
    subjects_labels = [subjects_labels[subject] for subject in subjects]

    mice_uris = projects['URI'][np.in1d(projects['label'], subjects_ids)]

    # Generate the list of urls by session
    img_file = 'rsfMRI.img'
    hdr_file = 'rsfMRI.hdr'
    func_path = 'scans/rs_fMRI/resources/NULL/files'
    img_urls = [os.path.join(url + b, func_path, img_file) for b in mice_uris]
    hdr_urls = [os.path.join(url + b, func_path, hdr_file) for b in mice_uris]

    # Generate the list of target files by session
    target_img = [
        os.path.join(label, sub, img_file)
        for sub, label in zip(subjects_ids, subjects_labels)
    ]
    target_hdr = [
        os.path.join(label, sub, hdr_file)
        for sub, label in zip(subjects_ids, subjects_labels)
    ]

    # Call fetch_files once per subject.
    img = []
    for img_u, hdr_u, img_f, hdr_f in zip(img_urls, hdr_urls, target_img,
                                          target_hdr):
        f, _ = _fetch_files(data_dir, [(img_f, img_u, {
            'move': img_f
        }), (hdr_f, hdr_u, {
            'move': hdr_f
        })],
                            verbose=verbose)
        img.append(f)

    fdescr = _get_dataset_descr(dataset_name)

    return Bunch(func=img, anesthesiant=subjects_labels, description=fdescr)
Exemple #18
0
def fetch_zurich_test_retest(subjects=range(15),
                             sessions=[1],
                             data_dir=None,
                             url=None,
                             resume=True,
                             verbose=1,
                             correct_headers=False):
    """Download and loads the ETH-Zurich test-retest dataset.

    Parameters
    ----------
    subjects : sequence of int or None, optional
        ids of subjects to load, default to loading all subjects.

    sessions : iterable of int, optional
        The sessions to load. Load only the first session by default.

    data_dir : string, optional
        Path of the data directory. Used to force data storage in a specified
        location. Default: None

    resume : bool, optional (default True)
        If true, try resuming download if possible.

    verbose : int, optional (default 0)
        Defines the level of verbosity of the output.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are :

        - 'func': string list. Paths to functional images.
        - 'anat': string list. Paths to anatomic images.
        - 'session': numpy array. List of ids corresponding to images sessions.

    Notes
    ------
    This dataset is composed of 2 sessions of 15 male mice.
    For each mice, 2 resting-state scans of continuous EPI
    functional volumes were collected, both with their anatomical scan.
    Session 2  was collected 15-20 days after Session 1.

    References
    ----------
    :Download:
        https://central.xnat.org

    :Reference:
        `Mapping the Mouse Brain with Rs-fMRI: An Optimized Pipeline for
        Functional Network Identification
        <http://dx.doi.org/10.1016/j.neuroimage.2015.07.090>`_
        NeuroImage 123 (2015): 11-21.
        V. Zerbi, J. Grandjean, M. Rudin, and N. Wenderoth.

    """
    if url is None:
        url = 'https://central.xnat.org'

    dataset_name = 'zurich_retest'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    # First, fetch the file that references all individual URLs
    json_file = _fetch_file(url + '/data/experiments.html',
                            data_dir,
                            verbose=verbose)

    # Return the json file contents as a dictionary
    with open(json_file) as json_data:
        rows = json.load(json_data).values()[0]['Result']

    names = [name for name in rows[0].keys()]
    projects = {}
    for name in names:
        projects[name] = np.array([row[name] for row in rows])

    # Collect directories for all mice in the test-restest dataset
    subject_ids = [
        '1366', '1367', '1368', '1369', '1371', '1378', '1380', '1402', '1403',
        '1404', '1405', '1406', '1407', '1411', '1412'
    ]
    baseline_subject_ids = [subject + '_baseline' for subject in subject_ids]
    post_subject_ids = [subject + '_post' for subject in subject_ids]
    baseline_uris = projects['URI'][np.in1d(projects['label'],
                                            baseline_subject_ids)]
    post_uris = projects['URI'][np.in1d(projects['label'], post_subject_ids)]

    # Generate the list of urls by session
    func_file = 'rsfMRI.nii.gz'
    anat_file = '3DRARE.nii.gz'
    func_path = 'scans/rsfMRI/resources/NIFTI/files'
    anat_path = 'scans/anatomical/resources/NIFTI/files'
    func_urls = [[
        os.path.join(url + b, func_path, func_file) for b in baseline_uris
    ], [os.path.join(url + p, func_path, func_file) for p in post_uris]]
    anat_urls = [[
        os.path.join(url + b, anat_path, anat_file) for b in baseline_uris
    ], [os.path.join(url + p, anat_path, anat_file) for p in post_uris]]

    # Generate the list of target files by session
    func_files = [[
        os.path.join('baseline', sub, func_file) for sub in subject_ids
    ], [os.path.join('post', sub, func_file) for sub in subject_ids]]
    anat_files = [[
        os.path.join('baseline', sub, anat_file) for sub in subject_ids
    ], [os.path.join('post', sub, anat_file) for sub in subject_ids]]

    # Check arguments
    max_subjects = len(subject_ids)
    if max(subjects) > max_subjects:
        warnings.warn(
            'Warning: there are only {0} subjects'.format(max_subjects))
        subjects = range(max_subjects)
    unique_subjects, indices = np.unique(subjects, return_index=True)
    if len(unique_subjects) < len(subjects):
        warnings.warn('Warning: Duplicate subjects, removing them.')
        subjects = unique_subjects[np.argsort(indices)]

    n_subjects = len(subjects)
    target_anat = []
    target_func = []
    source_anat = []
    source_func = []
    session = []
    for i in sessions:
        if not (i in [1, 2]):
            raise ValueError('Zurich dataset session id must be in [1, 2]')
        source_anat += [anat_urls[i - 1][subject] for subject in subjects]
        source_func += [func_urls[i - 1][subject] for subject in subjects]
        target_anat += [anat_files[i - 1][subject] for subject in subjects]
        target_func += [func_files[i - 1][subject] for subject in subjects]
        session += [i] * n_subjects

    # Call fetch_files once per subject.
    func = []
    anat = []
    for anat_u, anat_f, func_u, func_f in zip(source_anat, target_anat,
                                              source_func, target_func):
        a, f = _fetch_files(data_dir, [(anat_f, anat_u, {
            'move': anat_f
        }), (func_f, func_u, {
            'move': func_f
        })],
                            verbose=verbose)
        func.append(f)
        anat.append(a)

    fdescr = _get_dataset_descr(dataset_name)

    # This data has wrong sforms and qforms in the headers, so we correct them.
    if correct_headers:
        corrected_anat = []
        for a in anat:
            corrected_a = os.path.join(os.path.dirname(a),
                                       '3DRARE_corrected.nii.gz')
            _reset_affines(a,
                           corrected_a,
                           axes_to_permute=[(1, 2)],
                           axes_to_flip=[0],
                           verbose=0)
            corrected_anat.append(corrected_a)
        corrected_func = []
        for f in func:
            corrected_f = os.path.join(os.path.dirname(f),
                                       'rsfMRI_corrected.nii.gz')
            _reset_affines(f,
                           corrected_f,
                           center_mass=(0, 0, 0),
                           xyzscale=.1,
                           axes_to_permute=[(1, 2)],
                           axes_to_flip=[0],
                           verbose=0)
            corrected_func.append(corrected_f)
        anat = corrected_anat
        func = corrected_func

    return Bunch(anat=anat, func=func, session=session, description=fdescr)
Exemple #19
0
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1):
    """Function to fetch FSL FEEDS dataset (single-subject)

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)

    def _glob_fsl_feeds_data(subject_dir):
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        for file_name in FSL_FEEDS_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path) or os.path.exists(
                    file_path.rstrip(".gz")):
                file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name)
            else:
                if not os.path.basename(subject_dir) == 'data':
                    return _glob_fsl_feeds_data(os.path.join(subject_dir,
                                                             'feeds/data'))
                else:
                    print "%s missing from filelist!" % file_name
                    return None
        return Bunch(data_dir=data_dir,
                     func=os.path.join(subject_dir, "fmri.nii.gz"),
                     anat=os.path.join(
                         subject_dir, "structural_brain.nii.gz"))

    # maybe data_dir already contents the data ?
    data = _glob_fsl_feeds_data(data_dir)
    if not data is None:
        return data

    # download the data
    print("Data absent, downloading...")
    url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/"
           "fsl-4.1.0-feeds.tar.gz")
    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print "Archive corrupted, trying to download it again."
        os.remove(archive_path)
        return fetch_fsl_feeds(data_dir=data_dir, data_name="")
    return _glob_fsl_feeds_data(data_dir)
Exemple #20
0
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
                       subject_id="sub001", verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_auditory_data():
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        for file_name in SPM_AUDITORY_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path):
                subject_data[file_name] = file_path
            else:
                print("%s missing from filelist!" % file_name)
                return None

        _subject_data = {}
        _subject_data["func"] = sorted([subject_data[x]
                                        for x in subject_data.keys()
                                        if re.match("^fM00223_0\d\d\.img$",
                                                    os.path.basename(x))])

        # volumes for this dataset of shape (64, 64, 64, 1); let's fix this
        for x in _subject_data["func"]:
            vol = nibabel.load(x)
            if len(vol.shape) == 4:
                vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                          vol.get_affine())
                nibabel.save(vol, x)

        _subject_data["anat"] = [subject_data[x] for x in subject_data.keys()
                                 if re.match("^sM00223_002\.img$",
                                             os.path.basename(x))][0]

        # ... same thing for anat
        vol = nibabel.load(_subject_data["anat"])
        if len(vol.shape) == 4:
            vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                      vol.get_affine())
            nibabel.save(vol, _subject_data["anat"])

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_auditory_data()
    if not data is None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir, data_name="",
                                  subject_id=subject_id)

    return _glob_spm_auditory_data()
Exemple #21
0
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri",
                              subject_id="sub001", verbose=1):
    """Fetcher for Multi-modal Face Dataset.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func1': string list. Paths to functional images for session 1
        - 'func2': string list. Paths to functional images for session 2
        - 'trials_ses1': string list. Path to onsets file for session 1
        - 'trials_ses2': string list. Path to onsets file for session 2
        - 'anat': string. Path to anat file

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/

    """

    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_multimodal_fmri_data():
        """glob data from subject_dir."""
        _subject_data = {'slice_order': 'descending'}

        for s in range(2):
            # glob func data for session s + 1
            session_func = sorted(glob.glob(
                    os.path.join(
                        subject_dir,
                        ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % (
                                s + 1, s + 5)))))
            if len(session_func) < 390:
                print "Missing %i functional scans for session %i." % (
                    390 - len(session_func), s)
                return None
            else:
                _subject_data['func%i' % (s + 1)] = session_func

            # glob trials .mat file
            sess_trials = os.path.join(
                subject_dir,
                "fMRI/trials_ses%i.mat" % (s + 1))
            if not os.path.isfile(sess_trials):
                print "Missing session file: %s" % sess_trials
                return None
            else:
                _subject_data['trials_ses%i' % (s + 1)] = sess_trials

        # glob for anat data
        anat = os.path.join(subject_dir, "sMRI/smri.img")
        if not os.path.isfile(anat):
            print "Missing structural image."
            return None
        else:
            _subject_data["anat"] = anat

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_multimodal_fmri_data()
    if not data is None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    urls = [
        # fmri
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
        "multimodal_fmri.zip"),

        # structural
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
         "multimodal_smri.zip")
        ]

    for url in urls:
        archive_path = os.path.join(subject_dir, os.path.basename(url))
        _fetch_file(url, subject_dir)
        try:
            _uncompress_file(archive_path)
        except:
            print("Archive corrupted, trying to download it again.")
            return fetch_spm_multimodal_fmri_data(data_dir=data_dir,
                                                  data_name="",
                                                  subject_id=subject_id)

    return _glob_spm_multimodal_fmri_data()
Exemple #22
0
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1):
    """Function to fetch FSL FEEDS dataset (single-subject)

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)

    def _glob_fsl_feeds_data(subject_dir):
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        subject_data["subject_dir"] = subject_dir
        for file_name in FSL_FEEDS_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path) or os.path.exists(
                    file_path.rstrip(".gz")):
                file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name)
                subject_data[file_name] = file_path
            else:
                if not os.path.basename(subject_dir) == 'data':
                    return _glob_fsl_feeds_data(os.path.join(subject_dir,
                                                             'feeds/data'))
                else:
                    print "%s missing from filelist!" % file_name
                    return None

        _subject_data = {"func": os.path.join(subject_dir,
                                              "fmri.nii.gz"),
                         "anat": os.path.join(subject_dir,
                                              "structural_brain.nii.gz")
                         }

        return Bunch(**_subject_data)

    # maybe data_dir already contents the data ?
    data = _glob_fsl_feeds_data(data_dir)
    if not data is None:
        return data

    # download the data
    print("Data absent, downloading...")
    url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/"
           "fsl-4.1.0-feeds.tar.gz")
    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print "Archive corrupted, trying to download it again."
        os.remove(archive_path)
        return fetch_fsl_feeds(data_dir=data_dir, data_name="")
    return _glob_fsl_feeds_data(data_dir)