Python _get_dataset_dir Examples, nilearn.datasets.utils._get_dataset_dir Python Examples

Example #1

0

Show file

def test_get_dataset_dir():
    # testing folder creation under different environments, enforcing
    # a custom clean install
    os.environ.pop('NILEARN_DATA', None)
    os.environ.pop('NILEARN_SHARED_DATA', None)

    expected_base_dir = os.path.expanduser('~/nilearn_data')
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'test_nilearn_data')
    os.environ['NILEARN_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data')
    os.environ['NILEARN_SHARED_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'env_data')
    expected_dataset_dir = os.path.join(expected_base_dir, 'test')
    data_dir = utils._get_dataset_dir('test',
                                      default_paths=[expected_dataset_dir],
                                      verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    no_write = os.path.join(tst.tmpdir, 'no_write')
    os.makedirs(no_write)
    os.chmod(no_write, 0o400)

    expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data')
    os.environ['NILEARN_SHARED_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test',
                                      default_paths=[no_write],
                                      verbose=0)
    # Non writeable dir is returned because dataset may be in there.
    assert_equal(data_dir, no_write)
    assert os.path.exists(data_dir)
    os.chmod(no_write, 0o600)
    shutil.rmtree(data_dir)

    # Verify exception for a path which exists and is a file
    test_file = os.path.join(tst.tmpdir, 'some_file')
    with open(test_file, 'w') as out:
        out.write('abcfeg')
    assert_raises_regex(OSError, 'Nilearn tried to store the dataset '
                        'in the following directories, but',
                        utils._get_dataset_dir,
                        'test',
                        test_file,
                        verbose=0)

Example #2

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_camcan_all_without_sessions(data_dir, read=False, verbose=1):
    """Grab all timeseries paths of camcan data without any filtering.
    """
    dataset_name = 'camcan'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir, 'participant_data.csv')
    phenotypic_data = pd.read_csv(phenotypic_path)

    timeseries_name = 'timeseries'
    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = os.path.join(data_dir, '*.csv')

    timeseries_paths = glob.glob(paths)

    if not read:
        return Bunch(timeseries_paths=timeseries_paths,
                     phenotypic_path=phenotypic_path)

    timeseries_data = []
    for path in timeseries_paths:
        data = pd.read_csv(path)
        data = data.drop('Unnamed: 0', axis=1)
        timeseries_data.append(data)

    return Bunch(timeseries_data=timeseries_data,
                 phenotypic_data=pd.read_csv(phenotypic_path))

Example #3

0

Show file

File: test_atlas.py Project: jeromedockes/nilearn

def test_get_dataset_dir():
    # testing folder creation under different environments, enforcing
    # a custom clean install
    os.environ.pop('NILEARN_DATA', None)
    os.environ.pop('NILEARN_SHARED_DATA', None)

    expected_base_dir = os.path.expanduser('~/nilearn_data')
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'test_nilearn_data')
    os.environ['NILEARN_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data')
    os.environ['NILEARN_SHARED_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test', verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    expected_base_dir = os.path.join(tst.tmpdir, 'env_data')
    expected_dataset_dir = os.path.join(expected_base_dir, 'test')
    data_dir = utils._get_dataset_dir(
        'test', default_paths=[expected_dataset_dir], verbose=0)
    assert_equal(data_dir, os.path.join(expected_base_dir, 'test'))
    assert os.path.exists(data_dir)
    shutil.rmtree(data_dir)

    no_write = os.path.join(tst.tmpdir, 'no_write')
    os.makedirs(no_write)
    os.chmod(no_write, 0o400)

    expected_base_dir = os.path.join(tst.tmpdir, 'nilearn_shared_data')
    os.environ['NILEARN_SHARED_DATA'] = expected_base_dir
    data_dir = utils._get_dataset_dir('test',
                                      default_paths=[no_write],
                                      verbose=0)
    # Non writeable dir is returned because dataset may be in there.
    assert_equal(data_dir, no_write)
    assert os.path.exists(data_dir)
    # Set back write permissions in order to be able to remove the file
    os.chmod(no_write, 0o600)
    shutil.rmtree(data_dir)

    # Verify exception for a path which exists and is a file
    test_file = os.path.join(tst.tmpdir, 'some_file')
    with open(test_file, 'w') as out:
        out.write('abcfeg')
    assert_raises_regex(OSError,
                        'Nilearn tried to store the dataset '
                        'in the following directories, but',
                        utils._get_dataset_dir,
                        'test', test_file, verbose=0)

Example #4

0

Show file

File: hcp.py Project: lelegan/modl

def fetch_hcp_rest(data_dir, n_subjects=40):
    dataset_name = 'HCP'
    source_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                  verbose=0)
    extra_dir = _get_dataset_dir('HCP_extra', data_dir=data_dir,
                                 verbose=0)
    mask = join(extra_dir, 'mask_img.nii.gz')
    behavioral_df = _fetch_hcp_behavioral_data(join(extra_dir, 'behavioral'))
    func = []
    meta = []
    ids = []

    list_dir = glob.glob(join(source_dir, '*/*/MNINonLinear/Results'))
    for dirpath in list_dir[:n_subjects]:
        dirpath_split = dirpath.split(os.sep)
        subject_id = dirpath_split[-3]
        serie_id = dirpath_split[-4]

        subject_id = int(subject_id)

        try:
            this_behavioral = behavioral_df.loc[subject_id]
        except KeyError:
            # Ignore subject without behavior data
            continue

        ids.append(subject_id)

        kwargs = {'subject_id': subject_id,
                  'serie_id': serie_id}

        meta.append(kwargs)

        subject_func = []

        for filename in os.listdir(dirpath):
            name, ext = os.path.splitext(filename)
            if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR',
                        'rfMRI_REST2_RL',
                        'rfMRI_REST2_LR'):
                filename = join(dirpath, filename, filename + '.nii.gz')
                subject_func.append(filename)
        func.append(subject_func)

    results = {'func': func, 'meta': meta,
               'mask': mask,
               'description': "'Human connectome project",
               'behavioral': behavioral_df.loc[ids]}
    return Bunch(**results)

Example #5

0

Show file

File: hcp.py Project: ml-lab/modl

def fetch_hcp_rest(data_dir, n_subjects=40):
    dataset_name = 'HCP'
    source_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=0)
    extra_dir = _get_dataset_dir('HCP_extra', data_dir=data_dir, verbose=0)
    mask = join(extra_dir, 'mask_img.nii.gz')
    behavioral_df = _fetch_hcp_behavioral_data(join(extra_dir, 'behavioral'))
    func = []
    meta = []
    ids = []

    list_dir = glob.glob(join(source_dir, '*/*/MNINonLinear/Results'))
    for dirpath in list_dir[:n_subjects]:
        dirpath_split = dirpath.split(os.sep)
        subject_id = dirpath_split[-3]
        serie_id = dirpath_split[-4]

        subject_id = int(subject_id)

        try:
            this_behavioral = behavioral_df.loc[subject_id]
        except KeyError:
            # Ignore subject without behavior data
            continue

        ids.append(subject_id)

        kwargs = {'subject_id': subject_id, 'serie_id': serie_id}

        meta.append(kwargs)

        subject_func = []

        for filename in os.listdir(dirpath):
            name, ext = os.path.splitext(filename)
            if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR', 'rfMRI_REST2_RL',
                        'rfMRI_REST2_LR'):
                filename = join(dirpath, filename, filename + '.nii.gz')
                subject_func.append(filename)
        func.append(subject_func)

    results = {
        'func': func,
        'meta': meta,
        'mask': mask,
        'description': "'Human connectome project",
        'behavioral': behavioral_df.loc[ids]
    }
    return Bunch(**results)

Example #6

0

Show file

File: test_func.py Project: ltetrel/nilearn

def test_fetch_openneuro_dataset(request_mocker, tmp_path):
    dataset_version = 'ds000030_R1.0.4'
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=str(tmp_path), verbose=1)
    url_file = os.path.join(data_dir, 'urls.json')
    # Prepare url files for subject and filter tests
    urls = [
        "https://example.com/{}/stuff.html" + data_prefix + '',
        "https://example.com/{}/sub-xxx.html",
        "https://example.com/{}/sub-yyy.html",
        "https://example.com/{}/sub-xxx/ses-01_task-rest.txt",
        "https://example.com/{}/sub-xxx/ses-01_task-other.txt",
        "https://example.com/{}/sub-xxx/ses-02_task-rest.txt",
        "https://example.com/{}/sub-xxx/ses-02_task-other.txt",
        "https://example.com/{}/sub-yyy/ses-01.txt",
        "https://example.com/{}/sub-yyy/ses-02.txt"
    ]
    urls = [url.format(data_prefix) for url in urls]
    json.dump(urls, open(url_file, 'w'))

    # Only 1 subject and not subject specific files get downloaded
    datadir, dl_files = func.fetch_openneuro_dataset(urls, str(tmp_path),
                                                     dataset_version)
    assert isinstance(datadir, str)
    assert isinstance(dl_files, list)
    assert len(dl_files) == 9

Example #7

0

Show file

File: datasets.py Project: coolspiderghy/nistats

def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        epi_img: the input 4D image
        paradigm: a csv file describing the paardigm
    """
    url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy'

    dataset_name = "localizer_first_level"
    files = dict(epi_img="s12069_swaloc1_corr.nii.gz",
                 paradigm="localizer_paradigm.csv")
    # The options needed for _fetch_files
    options = [(filename, os.path.join(url, filename), {})
               for _, filename in sorted(files.items())]

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    sub_files = _fetch_files(data_dir, options, resume=True,
                             verbose=verbose)

    params = dict(zip(sorted(files.keys()), sub_files))

    return Bunch(**params)

Example #8

0

Show file

def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1):
    """Function to fetch FSL FEEDS dataset (single-subject)

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.utils.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)

    def _glob_fsl_feeds_data(subject_dir):
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        for file_name in FSL_FEEDS_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path) or os.path.exists(
                    file_path.rstrip(".gz")):
                file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name)
            else:
                if not os.path.basename(subject_dir) == 'data':
                    return _glob_fsl_feeds_data(
                        os.path.join(subject_dir, 'feeds/data'))
                else:
                    print("%s missing from filelist!" % file_name)
                    return None
        return Bunch(data_dir=data_dir,
                     func=os.path.join(subject_dir, "fmri.nii.gz"),
                     anat=os.path.join(subject_dir, "structural_brain.nii.gz"))

    # maybe data_dir already contents the data ?
    data = _glob_fsl_feeds_data(data_dir)
    if not data is None:
        return data

    # download the data
    print("Data absent, downloading...")
    url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/"
           "fsl-4.1.0-feeds.tar.gz")
    archive_path = os.path.join(data_dir, os.path.basename(url))
    for i in range(2):
        _fetch_files(data_dir, [("feeds", url, {
            "uncompress": True,
            "move": "fsl.tar"
        })])
    return _glob_fsl_feeds_data(data_dir)

Example #9

0

Show file

def fetch_openneuro_dataset_index(
        data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1):
    """Download openneuro bids dataset index

    Downloading the index allows to explore the dataset directories
    to select specific files to download. The index is a sorted list of urls.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    urls_path: string
        Path to downloaded dataset index

    urls: list of string
        Sorted list of dataset directories
    """
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir,
                                verbose=verbose)

    # First we download the url list from the uncompressed dataset version
    urls_path = os.path.join(data_dir, 'urls.json')
    urls = []
    if not os.path.exists(urls_path):

        def get_url(endpoint_url, bucket_name, file_key):
            return '{}/{}/{}'.format(endpoint_url, bucket_name, file_key)

        resource = boto3.resource('s3')
        resource.meta.client.meta.events.register('choose-signer.s3.*',
                                                  disable_signing)
        bucket = resource.Bucket('openneuro')

        for obj in bucket.objects.filter(Prefix=data_prefix):
            # get url of files (keys of directories end with '/')
            if obj.key[-1] != '/':
                urls.append(
                    get_url(bucket.meta.client.meta.endpoint_url,
                            bucket.name, obj.key))
        urls = sorted(urls)

        with open(urls_path, 'w') as json_file:
            json.dump(urls, json_file)
    else:
        with open(urls_path, 'r') as json_file:
            urls = json.load(json_file)

    return urls_path, urls

Example #10

0

Show file

def fetch_craddock_adhd_200_parcellations(data_dir=None, verbose=1):
    """These are the parcellations from the Athena Pipeline of the ADHD
    200 preprocessing initiative. 200 and 400 ROI atlases were generated
    using 2-level parcellation of 650 individuals from the ADHD 200 Sample.

    Parameters
    ----------
    data_dir : str
        Directory where the data should be downloaded.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        parcellations_200, parcellations_400
    """
    url = 'http://www.nitrc.org/frs/download.php/5906/ADHD200_parcellations.tar.gz'
    opts = {'uncompress': True}

    dataset_name = 'craddock_ADHD200_parcellations'
    filenames = [("ADHD200_parcellate_200.nii.gz", url, opts),
                 ("ADHD200_parcellate_400.nii.gz", url, opts)]

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose)
    files = _fetch_files(data_dir, filenames, verbose=verbose)
    keys = ("parcellations_200", "parcellations_400")
    params = dict(list(zip(keys, files)))
    return Bunch(**params)

Example #11

0

Show file

File: derivative.py Project: stjordanis/cogspaces

def fetch_reduced_loadings(data_dir=None,
                           url=None,
                           verbose=False,
                           resume=True):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/loadings/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'loadings'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = STUDY_LIST

    paths = ['data_%s.pt' % key for key in keys]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = (
        "Z-statistic loadings over a dictionary of 453 components covering "
        "grey-matter `modl_atlas['components_512_gm']` "
        "for 35 different task fMRI studies.")

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return params

Example #12

0

Show file

File: datasets.py Project: takhs91/nistats

def fetch_fiac_first_level(data_dir=None, verbose=1):
    """ Download a first-level fiac fMRI dataset (2 sessions)

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.
    """
    data_dir = _get_dataset_dir('fiac_nistats',
                                data_dir=data_dir,
                                verbose=verbose)

    def _glob_fiac_data():
        """glob data from subject_dir."""
        _subject_data = {}
        subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0')
        for session in [1, 2]:
            # glob func data for session session + 1
            session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session)
            if not os.path.isfile(session_func):
                print('Missing functional scan for session %i.' % session)
                return None

            _subject_data['func%i' % session] = session_func

            # glob design matrix .npz file
            sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session)
            if not os.path.isfile(sess_dmtx):
                print('Missing session file: %s' % sess_dmtx)
                return None

            _subject_data['design_matrix%i' % session] = sess_dmtx

        # glob for mask data
        mask = os.path.join(subject_dir, 'mask.nii.gz')
        if not os.path.isfile(mask):
            print('Missing mask image.')
            return None

        _subject_data['mask'] = mask
        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_fiac_data()
    if data is not None:
        return data

    # No. Download the data
    print('Data absent, downloading...')
    url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz'

    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print('Archive corrupted, trying to download it again.')
        return fetch_fiac_first_level(data_dir=data_dir)

    return _glob_fiac_data()

Example #13

0

Show file

File: test_datasets.py Project: ja-che/nistats

def test_fetch_openneuro_dataset():
    dataset_version = 'ds000030_R1.0.4'
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=tst.tmpdir, verbose=1)
    url_file = os.path.join(data_dir, 'urls.json')
    # Prepare url files for subject and filter tests
    urls = [
        data_prefix + '/stuff.html', data_prefix + '/sub-xxx.html',
        data_prefix + '/sub-yyy.html',
        data_prefix + '/sub-xxx/ses-01_task-rest.txt',
        data_prefix + '/sub-xxx/ses-01_task-other.txt',
        data_prefix + '/sub-xxx/ses-02_task-rest.txt',
        data_prefix + '/sub-xxx/ses-02_task-other.txt',
        data_prefix + '/sub-yyy/ses-01.txt',
        data_prefix + '/sub-yyy/ses-02.txt'
    ]
    json.dump(urls, open(url_file, 'w'))

    # Only 1 subject and not subject specific files get downloaded
    datadir, dl_files = datasets.fetch_openneuro_dataset(
        urls, tst.tmpdir, dataset_version)
    assert_true(isinstance(datadir, _basestring))
    assert_true(isinstance(dl_files, list))
    assert_true(len(dl_files) == 9)

Example #14

0

Show file

def fetch_emotion_ratings(data_dir=None, resume=True, verbose=1):
    '''Download and loads emotion rating dataset from neurovault

	Args:
		data_dir: (string, optional). Path of the data directory. Used to
					force data storage in a specified location. Default: None
		n_subjects: (int, optional) Number of subjects, from 1 to 6.
					NOTE: n_subjects is deprecated from 0.2.6 and will be
					removed in 0.3 Use `subjects` instead.
		subjects : (list or int, optional) Either a list of subjects or the
					number of subjects to load, from 1 to 6. By default,
					2nd subject will be loaded. Empty list returns no subject
					data
	Returns:
	'''

    collection = 1964
    dataset_name = 'chang2015_emotion_ratings'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    metadata, files = download_collection(collection=collection,
                                          data_dir=data_dir,
                                          resume=resume,
                                          verbose=verbose)
    return Brain_Data(data=files, X=metadata)

Example #15

0

Show file

File: datasets.py Project: ilkayisik/nistats

def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, with the keys:
        epi_img: the input 4D image
        events: a csv file describing the paardigm
    """
    url = 'https://osf.io/2bqxn/download'
    epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz'
    events = 'sub-12069_task-localizer_events.tsv'
    opts = {'uncompress': True}
    options = ('epi_img', 'events')
    dir_ = 'localizer_first_level'
    filenames = [(os.path.join(dir_, name), url, opts)
                 for name in [epi_img, events]]

    dataset_name = 'localizer_first_level'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    files = _fetch_files(data_dir, filenames, verbose=verbose)

    params = dict(list(zip(options, files)))
    return Bunch(**params)

Example #16

0

Show file

File: test_utils.py Project: jeromedockes/nistats

def test_make_fresh_openneuro_dataset_urls_index():
    dataset_version = 'ds000030_R1.0.4'
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=tst.tmpdir, verbose=1)
    url_file = os.path.join(
        data_dir,
        'nistats_fetcher_openneuro_dataset_urls.json',
    )
    # Prepare url files for subject and filter tests
    file_list = [
        data_prefix + '/stuff.html', data_prefix + '/sub-xxx.html',
        data_prefix + '/sub-yyy.html',
        data_prefix + '/sub-xxx/ses-01_task-rest.txt',
        data_prefix + '/sub-xxx/ses-01_task-other.txt',
        data_prefix + '/sub-xxx/ses-02_task-rest.txt',
        data_prefix + '/sub-xxx/ses-02_task-other.txt',
        data_prefix + '/sub-yyy/ses-01.txt',
        data_prefix + '/sub-yyy/ses-02.txt'
    ]
    with open(url_file, 'w') as f:
        json.dump(file_list, f)

    # Only 1 subject and not subject specific files get downloaded
    datadir, dl_files = make_fresh_openneuro_dataset_urls_index(
        tst.tmpdir, dataset_version)
    assert_true(isinstance(datadir, _basestring))
    assert_true(isinstance(dl_files, list))
    assert_true(len(dl_files) == len(file_list))

Example #17

0

Show file

File: datasets.py Project: takhs91/nistats

def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, keys are:
        epi_img: the input 4D image
        paradigm: a csv file describing the paardigm
    """
    url = 'ftp://ftp.cea.fr/pub/dsv/madic/download/nipy'

    dataset_name = "localizer_first_level"
    files = dict(epi_img="s12069_swaloc1_corr.nii.gz",
                 paradigm="localizer_paradigm.csv")
    # The options needed for _fetch_files
    options = [(filename, os.path.join(url, filename), {})
               for _, filename in sorted(files.items())]

    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    sub_files = _fetch_files(data_dir, options, resume=True, verbose=verbose)

    params = dict(zip(sorted(files.keys()), sub_files))

    return Bunch(**params)

Example #18

0

Show file

def fetch_mist():
    """Download MIST parcellation n=122
    https://mniopenresearch.org/articles/1-3

    Returns
    -------
    maps : str
        Path to MIST parcellation

    labels : list of str
        Anatomical labels assigned to each label
    """
    url = 'https://ndownloader.figshare.com/files/9811081'
    opts = {'uncompress': True}
    data_dir = _get_dataset_dir('mist', data_dir=None, verbose=1)
    files = [(join('Release', 'Parcel_Information',
                   'MIST_122.csv'), url, opts),
             (join('Release', 'Parcellations', 'MIST_122.nii.gz'), url, opts)]
    files = _fetch_files(data_dir, files, resume=True, verbose=1)
    parcel_info = pd.read_csv(files[0], sep=';')
    names = parcel_info['name']
    df = pd.DataFrame(['Background'], columns=['name'])
    for i in range(names.shape[0]):
        df2 = pd.DataFrame([names[i]], columns=['name'])
        df = df.append(df2, ignore_index=True)
    return Bunch(maps=files[1], labels=df)

Example #19

0

Show file

File: datasets.py Project: ja-che/nistats

def fetch_bids_langloc_dataset(data_dir=None, verbose=1):
    """Download language localizer example bids dataset.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    url = 'https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e'
    dataset_name = 'bids_langloc_example'
    main_folder = 'bids_langloc_dataset'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    # The files_spec needed for _fetch_files
    files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})]
    if not os.path.exists(os.path.join(data_dir, main_folder)):
        downloaded_files = _fetch_files(data_dir, files_spec, resume=True,
                                        verbose=verbose)
        _uncompress_file(downloaded_files[0])
    main_path = os.path.join(data_dir, main_folder)
    file_list = [os.path.join(path, f) for
                 path, dirs, files in os.walk(main_path) for f in files]
    return os.path.join(data_dir, main_folder), sorted(file_list)

Example #20

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_hcp_confounds(data_dir, session, session_type, verbose=1):
    """Load confounds of HCP of "LR"

    Session we have 1 and 2 in integers which denotes REST1 and REST 2

    session_type we have is 'LR' and 'RL'
    """
    dataset_name = 'HCP'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    paths = os.path.join(data_dir, '*')
    confound_name = 'confounds'
    data_dir = os.path.join(data_dir, '*', confound_name)

    if session == 1:
        if session_type == 'LR':
            filename_session = 'rfMRI_REST1_LR_Movement_Regressors.txt'
        elif session_type == 'RL':
            filename_session = 'rfMRI_REST1_RL_Movement_Regressors.txt'

    if session == 2:
        if session_type == 'LR':
            filename_session = 'rfMRI_REST2_LR_Movement_Regressors.txt'
        elif session_type == 'RL':
            filename_session = 'rfMRI_REST2_RL_Movement_Regressors.txt'

    paths = os.path.join(data_dir, filename_session)

    paths = glob.glob(paths)

    return paths

Example #21

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_hcp(data_dir, session, session_type,
             atlas_name='msdl', verbose=1):
    """Load HCP timeseries data paths of "LR"

    Session we have 1 and 2 in integers which denotes REST1 and REST 2

    session_type we have is 'LR' and 'RL'
    """
    dataset_name = 'HCP'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    paths = os.path.join(data_dir, '*')
    data_dir = os.path.join(data_dir, '*', atlas_name)

    if session == 1:
        if session_type == 'LR':
            filename_session = 'rfMRI_REST1_LR_raw'
        elif session_type == 'RL':
            filename_session = 'rfMRI_REST1_RL_raw'

    if session == 2:
        if session_type == 'LR':
            filename_session = 'rfMRI_REST2_LR_raw'
        elif session_type == 'RL':
            filename_session = 'rfMRI_REST2_RL_raw'

    paths = os.path.join(data_dir, filename_session)

    paths = glob.glob(paths)

    return paths

Example #22

0

Show file

File: datasets.py Project: alpinho/nistats

def fetch_fiac_first_level(data_dir=None, verbose=1):
    """ Download a first-level fiac fMRI dataset (2 sessions)

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.
    """
    data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir,
                                verbose=verbose)
    def _glob_fiac_data():
        """glob data from subject_dir."""
        _subject_data = {}
        subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0')
        for session in [1, 2]:
            # glob func data for session
            session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session)
            if not os.path.isfile(session_func):
                print('Missing functional scan for session %i.' % session)
                return None

            _subject_data['func%i' % session] = session_func

            # glob design matrix .npz file
            sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session)
            if not os.path.isfile(sess_dmtx):
                print('Missing session file: %s' % sess_dmtx)
                return None

            _subject_data['design_matrix%i' % session] = sess_dmtx

        # glob for mask data
        mask = os.path.join(subject_dir, 'mask.nii.gz')
        if not os.path.isfile(mask):
            print('Missing mask image.')
            return None

        _subject_data['mask'] = mask
        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_fiac_data()
    if data is not None:
        return data

    # No. Download the data
    print('Data absent, downloading...')
    url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz'

    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print('Archive corrupted, trying to download it again.')
        return fetch_fiac_first_level(data_dir=data_dir)

    return _glob_fiac_data()

Example #23

0

Show file

def test_fetch_openneuro_dataset(request_mocker, tmp_path):
    dataset_version = 'ds000030_R1.0.4'
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0],
        dataset_version,
    )
    data_dir = _get_dataset_dir(
        data_prefix,
        data_dir=tmp_path,
        verbose=1,
    )
    url_file = os.path.join(data_dir, 'urls.json')

    # Prepare url files for subject and filter tests
    urls = [
        f'https://example.com/{data_prefix}/stuff.html',
        f'https://example.com/{data_prefix}/sub-xxx.html',
        f'https://example.com/{data_prefix}/sub-yyy.html',
        f'https://example.com/{data_prefix}/sub-xxx/ses-01_task-rest.txt',
        f'https://example.com/{data_prefix}/sub-xxx/ses-01_task-other.txt',
        f'https://example.com/{data_prefix}/sub-xxx/ses-02_task-rest.txt',
        f'https://example.com/{data_prefix}/sub-xxx/ses-02_task-other.txt',
        f'https://example.com/{data_prefix}/sub-yyy/ses-01.txt',
        f'https://example.com/{data_prefix}/sub-yyy/ses-02.txt',
    ]
    json.dump(urls, open(url_file, 'w'))

    # Only 1 subject and not subject specific files get downloaded
    datadir, dl_files = func.fetch_openneuro_dataset(urls, tmp_path,
                                                     dataset_version)
    assert isinstance(datadir, str)
    assert isinstance(dl_files, list)
    assert len(dl_files) == 9

    # URLs do not contain the data_prefix, which should raise a ValueError
    urls = [
        'https://example.com/stuff.html',
        'https://example.com/sub-yyy/ses-01.txt',
    ]
    with pytest.raises(ValueError, match='This indicates that the URLs'):
        func.fetch_openneuro_dataset(urls, tmp_path, dataset_version)

    # Try downloading a different dataset without providing URLs
    # This should raise a warning and download ds000030.
    with pytest.warns(
            UserWarning,
            match='Downloading "ds000030_R1.0.4".',
    ):
        urls_path, urls = func.fetch_openneuro_dataset(
            urls=None,
            data_dir=tmp_path,
            dataset_version='ds500_v2',
            verbose=1,
        )

Example #24

0

Show file

File: derivative.py Project: stjordanis/cogspaces

def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/hcp_mask.nii.gz'
    files = [('hcp_mask.nii.gz', url, {})]

    dataset_name = 'mask'
    data_dir = get_data_dir(data_dir)
    dataset_dir = _get_dataset_dir(dataset_name,
                                   data_dir=data_dir,
                                   verbose=verbose)
    files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose)
    return files[0]

Example #25

0

Show file

File: datasets.py Project: ilkayisik/nistats

def fetch_openneuro_dataset_index(data_dir=None,
                                  dataset_version='ds000030_R1.0.4',
                                  verbose=1):
    """ Download a file with OpenNeuro BIDS dataset index.

    Downloading the index allows to explore the dataset directories
    to select specific files to download. The index is a sorted list of urls.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    urls_path: string
        Path to downloaded dataset index

    urls: list of string
        Sorted list of dataset directories
    """
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0],
        dataset_version,
    )
    data_dir = _get_dataset_dir(data_prefix,
                                data_dir=data_dir,
                                verbose=verbose)

    file_url = 'https://osf.io/86xj7/download'
    final_download_path = os.path.join(data_dir, 'urls.json')
    downloaded_file_path = _fetch_files(data_dir=data_dir,
                                        files=[(final_download_path, file_url,
                                                {
                                                    'move': final_download_path
                                                })],
                                        resume=True)
    urls_path = downloaded_file_path[0]
    with open(urls_path, 'r') as json_file:
        urls = json.load(json_file)
    return urls_path, urls

Example #26

0

Show file

File: derivative.py Project: stjordanis/cogspaces

def fetch_atlas_modl(data_dir=None, url=None, resume=True, verbose=1):
    """Download and load a multi-scale atlas computed using MODL over HCP900.

    Parameters
    ----------
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a non-
        standard location. Default: None (meaning: default)
    url: string, optional
        Download URL of the dataset. Overwrite the default URL.
    """

    if url is None:
        url = 'http://cogspaces.github.io/assets/data/modl/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'modl'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = [
        'components_64', 'components_128', 'components_453_gm',
        'loadings_128_gm'
    ]

    paths = [
        'components_64.nii.gz',
        'components_128.nii.gz',
        'components_453_gm.nii.gz',
        'loadings_128_gm.npy',
    ]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = 'Components computed using the MODL package, at various scale,' \
             'from HCP900 data'

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return Bunch(**params)

Example #27

0

Show file

File: utils.py Project: ninamiolane/cogspaces

def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://www.amensch.fr/data/cogspaces/mask/'

    files = ['hcp_mask.nii.gz', 'icbm_gm_mask.nii.gz', 'contrast_mask.nii.gz']

    if isinstance(url, str):
        url = [url] * len(files)

    files = [(f, u + f, {}) for f, u in zip(files, url)]

    dataset_name = 'mask'
    data_dir = get_data_dir(data_dir)
    dataset_dir = _get_dataset_dir(dataset_name,
                                   data_dir=data_dir,
                                   verbose=verbose)
    files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose)
    return {'hcp': files[0], 'icbm_gm': files[1], 'contrast': files[2]}

Example #28

0

Show file

File: datasets.py Project: ilkayisik/nistats

def fetch_language_localizer_demo_dataset(data_dir=None, verbose=1):
    """Download language localizer demo dataset.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    url = 'https://osf.io/nh987/download'
    main_folder = 'fMRI-language-localizer-demo-dataset'

    data_dir = _get_dataset_dir(main_folder,
                                data_dir=data_dir,
                                verbose=verbose)
    # The files_spec needed for _fetch_files
    files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})]
    # Only download if directory is empty
    # Directory will have been created by the call to _get_dataset_dir above
    if not os.listdir(data_dir):
        downloaded_files = _fetch_files(data_dir,
                                        files_spec,
                                        resume=True,
                                        verbose=verbose)
        _uncompress_file(downloaded_files[0])

    file_list = [
        os.path.join(path, f) for path, dirs, files in os.walk(data_dir)
        for f in files
    ]
    return data_dir, sorted(file_list)

Example #29

0

Show file

File: datasets.py Project: paxtonfitzpatrick/nltools

def fetch_emotion_ratings(data_dir=None, resume=True, verbose=1):
    """Download and loads emotion rating dataset from neurovault

    Args:
        data_dir: (string, optional). Path of the data directory. Used to force data storage in a specified location. Default: None

    Returns:
        out: (Brain_Data) Brain_Data object with downloaded data. X=metadata

    """

    collection = 1964
    dataset_name = "chang2015_emotion_ratings"
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    metadata, files = download_collection(collection=collection,
                                          data_dir=data_dir,
                                          resume=resume,
                                          verbose=verbose)
    return Brain_Data(data=files, X=metadata)

Example #30

0

Show file

File: datasets.py Project: ilkayisik/nistats

def fetch_spm_auditory(data_dir=None,
                       data_name='spm_auditory',
                       subject_id='sub001',
                       verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)
    if not os.path.exists(subject_dir):
        _download_spm_auditory_data(data_dir, subject_dir, subject_id)
    spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
    try:
        spm_auditory_data['events']
    except KeyError:
        events_filepath = _make_path_events_file_spm_auditory_data(
            spm_auditory_data)
        if not os.path.isfile(events_filepath):
            _make_events_file_spm_auditory_data(events_filepath)
        spm_auditory_data['events'] = events_filepath
    return spm_auditory_data

Example #31

0

Show file

File: datasets.py Project: ilkayisik/nistats

def fetch_spm_multimodal_fmri(data_dir=None,
                              data_name='spm_multimodal_fmri',
                              subject_id='sub001',
                              verbose=1):
    """Fetcher for Multi-modal Face Dataset.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func1': string list. Paths to functional images for session 1
        - 'func2': string list. Paths to functional images for session 2
        - 'trials_ses1': string list. Path to onsets file for session 1
        - 'trials_ses2': string list. Path to onsets file for session 2
        - 'anat': string. Path to anat file

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/

    """

    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    # maybe data_dir already contains the data ?
    data = _glob_spm_multimodal_fmri_data(subject_dir)
    if data is not None:
        return data

    # No. Download the data
    return _download_data_spm_multimodal(data_dir, subject_dir, subject_id)

Example #32

0

Show file

File: datasets.py Project: paxtonfitzpatrick/nltools

def download_collection(collection=None,
                        data_dir=None,
                        overwrite=False,
                        resume=True,
                        verbose=1):
    """
    Download images and metadata from Neurovault collection

    Args:
        collection (int, optional): collection id. Defaults to None.
        data_dir (str, optional): data directory. Defaults to None.
        overwrite (bool, optional): overwrite data directory. Defaults to False.
        resume (bool, optional): resume download. Defaults to True.
        verbose (int, optional): print diagnostic messages. Defaults to 1.

    Returns:
        (pd.DataFrame, list): (DataFrame of image metadata, list of files from downloaded collection)
    """

    if data_dir is None:
        data_dir = _get_dataset_dir(str(collection),
                                    data_dir=data_dir,
                                    verbose=verbose)

    # Get collection Metadata
    metadata = get_collection_image_metadata(collection=collection,
                                             data_dir=data_dir)

    # Get images
    files = []
    for f in metadata["file"]:
        files.append(
            _fetch_file(f,
                        data_dir,
                        resume=resume,
                        verbose=verbose,
                        overwrite=overwrite))

    return (metadata, files)

Example #33

0

Show file

File: datasets.py Project: alpinho/nistats

def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
                       subject_id="sub001", verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)
    if not os.path.exists(subject_dir):
        _download_spm_auditory_data(data_dir, subject_dir, subject_id)
    spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
    try:
        spm_auditory_data['events']
    except KeyError:
        events_filepath = _make_path_events_file_spm_auditory_data(
                                                            spm_auditory_data)
        if not os.path.isfile(events_filepath):
            _make_events_file_spm_auditory_data(events_filepath)
        spm_auditory_data['events'] = events_filepath
    return spm_auditory_data

Example #34

0

Show file

def fetch_atlas_gordon_2014(coordinate_system='MNI', resolution=2,
                            data_dir=None, url=None, resume=True, verbose=1):
    """Download and returns Gordon et al. 2014 atlas

    References
    ----------
    Gordon, E. M., Laumann, T. O., Adeyemo, B., Huckins, J. F., Kelley, W. M., &
    Petersen, S. E., "Generation and evaluation of a cortical area
    parcellation from resting-state correlations", 2014, Cerebral cortex, bhu239.

    See http://www.nil.wustl.edu/labs/petersen/Resources.html for more
    information on this parcellation.
    """
    if url is None:
        url = ("https://sites.wustl.edu/petersenschlaggarlab/files/"
               "2018/06/Parcels-19cwpgu.zip")
    dataset_name = "gordon_2014"
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)

    valid_coordinate_systems = ['MNI', '711-2b']

    if coordinate_system not in valid_coordinate_systems:
        raise ValueError('Unknown coordinate system {0}. '
                         'Valid options are {1}'.format(
                             coordinate_system, valid_coordinate_systems))

    if resolution not in [1, 2, 3]:
        raise ValueError('Invalid resolution {0}. '
                         'Valid options are 1, 2 or 3.'.format(resolution))

    target_file = os.path.join('Parcels', 'Parcels_{0}_{1}.nii'.format(
        coordinate_system, str(resolution) * 3))

    atlas = _fetch_files(data_dir, [(target_file, url, {"uncompress": True})],
                         resume=resume, verbose=verbose)

    return atlas

Example #35

0

Show file

def download_collection(collection=None,
                        data_dir=None,
                        overwrite=False,
                        resume=True,
                        verbose=1):
    ''' Download images and metadata from Neurovault collection

	Args:
		collection:  (int) collection id
		data_dir:	(str) data directory

	Returns:
		metadata:	(pd.DataFrame) Dataframe with full image metadata from
					collection
		files:		(list) list of files of downloaded collection

	'''

    if data_dir is None:
        data_dir = _get_dataset_dir(str(collection),
                                    data_dir=data_dir,
                                    verbose=verbose)

    # Get collection Metadata
    metadata = get_collection_image_metadata(collection=collection,
                                             data_dir=data_dir)

    # Get images
    files = []
    for f in metadata['file']:
        files.append(
            _fetch_file(f,
                        data_dir,
                        resume=resume,
                        verbose=verbose,
                        overwrite=overwrite))

    return (metadata, files)

Example #36

0

Show file

File: datasets.py Project: alpinho/nistats

def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri",
                              subject_id="sub001", verbose=1):
    """Fetcher for Multi-modal Face Dataset.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func1': string list. Paths to functional images for session 1
        - 'func2': string list. Paths to functional images for session 2
        - 'trials_ses1': string list. Path to onsets file for session 1
        - 'trials_ses2': string list. Path to onsets file for session 2
        - 'anat': string. Path to anat file

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/

    """

    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    # maybe data_dir already contains the data ?
    data = _glob_spm_multimodal_fmri_data(subject_dir)
    if data is not None:
        return data

    # No. Download the data
    return _download_data_spm_multimodal(data_dir, subject_dir, subject_id)

Example #37

0

Show file

def fetch_pain(data_dir=None, resume=True, verbose=1):
    '''Download and loads pain dataset from neurovault

    Args:
        data_dir: (string, optional) Path of the data directory.
                   Used to force data storage in a specified location.
                   Default: None

    Returns:
        out: (Brain_Data) Brain_Data object with downloaded data. X=metadata

    '''

    collection = 504
    dataset_name = 'chang2015_pain'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)
    metadata, files = download_collection(collection=collection,
                                          data_dir=data_dir,
                                          resume=resume,
                                          verbose=verbose)
    return Brain_Data(data=files, X=metadata)

Example #38

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_abide(data_dir, site_id='all', read=False, verbose=1):
    """ Load ABIDE data timeseries extracted using MSDL atlas + compcor=10

    Parameters
    ----------
    data_dir : str
        Path to data. Base directory where it should contain folder named with
        'ABIDE'.

    site_id : str or list of str (case sensitive), optional='all'
        Site id within,
        'PITT', 'OLIN', 'OHSU', 'SDSU', 'TRINITY', 'UM_1', 'UM_2', 'USM',
        'YALE', 'CMU', 'LEUVEN_1', 'LEUVEN_2', 'KKI', 'NYU', 'STANFORD',
        'UCLA_1', 'UCLA_2', 'MAX_MUN', 'CALTECH', 'SBL'

        By default, data of all sites will be returned, site_id='all'.

        Total sites = 20

    read : bool
        Whether to read them or not using pandas.

    verbose : int
        Verbosity level

    Returns
    -------
    data : Bunch

    if read == False
        timeseries_paths : list of str
            Paths to csv contains timeseries data of each site.

        phenotypic_path : str
            Path to csv contains phenotypic data

    if read is set as True
        timeseries_data : list of numpy array
            Load them using pandas and convert to numpy arrays to be
            in compatible to nilearn and ConnectivityMeasure.

        file_ids : list of str
            Its file ids

        dx_groups : list of int
            Its DX_GROUP 1 is autism, 2 is control

        phenotypic_data : pandas Data
            Loaded phenotypic data
    """
    VALID_IDS = ['Pitt', 'Olin', 'OHSU', 'SDSU', 'Trinity', 'UM_1', 'UM_2',
                 'USM', 'Yale', 'CMU', 'Leuven_1', 'Leuven_2', 'KKI', 'NYU',
                 'Stanford', 'UCLA_1', 'UCLA_2', 'MaxMun', 'Caltech', 'SBL']
    dataset_name = 'ABIDE'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir, 'Phenotypic_V1_0b_preprocessed1.csv')

    timeseries_name = 'timeseries'

    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = []

    if site_id == 'all':
        site_id = VALID_IDS

    if not isinstance(site_id, collections.Iterable):
        site_id = [site_id, ]

    if isinstance(site_id, collections.Iterable):
        for i, this_id in enumerate(site_id):
            print(i, this_id)
            if not isinstance(this_id, _basestring) \
                    or this_id not in VALID_IDS:
                raise ValueError('An invalid site_id={0} is provided. '
                                 'Valid site names are: {1}'
                                 .format(this_id, VALID_IDS))
            filepaths = glob.glob(os.path.join(data_dir, this_id + '*.csv'))
            paths.extend(filepaths)

    if read:
        phenotypic_data = pd.read_csv(phenotypic_path)
        timeseries_data = []
        file_ids = []
        dx_groups = []
        if len(paths) != 0:
            for path in paths:
                filename = os.path.splitext(os.path.split(path)[1])[0]
                this_id = filename.split('_timeseries')[0]
                file_ids.append(this_id)
                data = pd.read_csv(path)
                data = data.drop('Unnamed: 0', axis=1)
                timeseries_data.append(np.asarray(data))
                this_group = phenotypic_data[
                    phenotypic_data['FILE_ID'] == this_id]['DX_GROUP']
                dx_groups.append(this_group.values[0])
        return Bunch(timeseries_data=timeseries_data, file_ids=file_ids,
                     dx_groups=dx_groups, phenotypic_data=phenotypic_data)
    else:
        return Bunch(timeseries_paths=paths,
                     phenotypic_path=phenotypic_path)

Example #39

0

Show file

File: datasets.py Project: alpinho/nistats

def fetch_openneuro_dataset_index(
        data_dir=None, dataset_version='ds000030_R1.0.4', verbose=1):
    """Download openneuro bids dataset index

    Downloading the index allows to explore the dataset directories
    to select specific files to download. The index is a sorted list of urls.

    Note: This function requires boto3 to be installed.

    Parameters
    ----------
    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    urls_path: string
        Path to downloaded dataset index

    urls: list of string
        Sorted list of dataset directories
    """
    from botocore.handlers import disable_signing
    boto3 = _check_import_boto3("boto3")
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir,
                                verbose=verbose)

    # First we download the url list from the uncompressed dataset version
    urls_path = os.path.join(data_dir, 'urls.json')
    urls = []
    if not os.path.exists(urls_path):

        def get_url(endpoint_url, bucket_name, file_key):
            return '{}/{}/{}'.format(endpoint_url, bucket_name, file_key)

        resource = boto3.resource('s3')
        resource.meta.client.meta.events.register('choose-signer.s3.*',
                                                  disable_signing)
        bucket = resource.Bucket('openneuro')

        for obj in bucket.objects.filter(Prefix=data_prefix):
            # get url of files (keys of directories end with '/')
            if obj.key[-1] != '/':
                urls.append(
                    get_url(bucket.meta.client.meta.endpoint_url,
                            bucket.name, obj.key))
        urls = sorted(urls)

        with open(urls_path, 'w') as json_file:
            json.dump(urls, json_file)
    else:
        with open(urls_path, 'r') as json_file:
            urls = json.load(json_file)

    return urls_path, urls

Example #40

0

Show file

File: datasets.py Project: banilo/pypreprocess

def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1):
    """Function to fetch FSL FEEDS dataset (single-subject)

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)

    def _glob_fsl_feeds_data(subject_dir):
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        subject_data["subject_dir"] = subject_dir
        for file_name in FSL_FEEDS_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path) or os.path.exists(
                    file_path.rstrip(".gz")):
                file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name)
                subject_data[file_name] = file_path
            else:
                if not os.path.basename(subject_dir) == 'data':
                    return _glob_fsl_feeds_data(os.path.join(subject_dir,
                                                             'feeds/data'))
                else:
                    print "%s missing from filelist!" % file_name
                    return None

        _subject_data = {"func": os.path.join(subject_dir,
                                              "fmri.nii.gz"),
                         "anat": os.path.join(subject_dir,
                                              "structural_brain.nii.gz")
                         }

        return Bunch(**_subject_data)

    # maybe data_dir already contents the data ?
    data = _glob_fsl_feeds_data(data_dir)
    if not data is None:
        return data

    # download the data
    print("Data absent, downloading...")
    url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/"
           "fsl-4.1.0-feeds.tar.gz")
    archive_path = os.path.join(data_dir, os.path.basename(url))
    _fetch_file(url, data_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print "Archive corrupted, trying to download it again."
        os.remove(archive_path)
        return fetch_fsl_feeds(data_dir=data_dir, data_name="")
    return _glob_fsl_feeds_data(data_dir)

Example #41

0

Show file

File: datasets.py Project: salma1601/process-asl

def fetch_kirby(subjects=range(2), sessions=[1], data_dir=None, url=None,
                resume=True, verbose=1):
    """Download and load the KIRBY multi-modal dataset.

    Parameters
    ----------
    subjects : sequence of int or None, optional
        ids of subjects to load, default to loading 2 subjects.

    sessions: iterable of int, optional
        The sessions to load. Load only the first session by default.

    data_dir: string, optional
        Path of the data directory. Used to force data storage in a specified
        location. Default: None

    url: string, optional
        Override download URL. Used for test only (or if you setup a mirror of
        the data). Default: None

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are :
         - 'anat': Paths to structural MPRAGE images
         - 'asl': Paths to ASL images
         - 'm0': Paths to ASL M0 images

    Notes
    ------

    This dataset is composed of 2 sessions of 21 participants (11 males) at 3T.
    Imaging modalities include MPRAGE, FLAIR,
    DTI, resting state fMRI, B0 and B1 field maps, ASL, VASO, quantitative T1
    mapping, quantitative T2 mapping, and magnetization transfer imaging.
    For each session, we only download MPRAGE and ASL data.

    More details about this dataset can be found here :
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263
    http://mri.kennedykrieger.org/databases.html

    Paper to cite
    -------------
        `Multi-Parametric Neuroimaging Reproducibility: A 3T Resource Study
        <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3020263>`_
        Bennett. A. Landman, Alan J. Huang, Aliya Gifford,Deepti S. Vikram,
        Issel Anne L. Lim, Jonathan A.D. Farrell, John A. Bogovic, Jun Hua,
        Min Chen,
        Samson Jarso, Seth A. Smith, Suresh Joel, Susumu Mori, James J. Pekar,
        Peter B. Barker, Jerry L. Prince, and Peter C.M. van Zijl.
        NeuroImage. (2010)
        NIHMS/PMC:252138 doi:10.1016/j.neuroimage.2010.11.047

    Licence
    -------
    `BIRN Data License
    <http://www.nbirn.net/bdr/Data_Use_Agreement_09_19_07-1.pdf>`_
    """

    if url is None:
        url = 'https://www.nitrc.org/frs/downloadlink.php/'

    # Preliminary checks and declarations
    dataset_name = 'kirby'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    subject_ids = np.array([
        '849', '934', '679', '906', '913', '142', '127', '742', '422', '815',
        '906', '239', '916', '959', '814', '505', '959', '492', '239', '142',
        '815', '679', '800', '916', '849', '814', '800', '656', '742', '113',
        '913', '502', '113', '127', '505', '502', '934', '492', '346', '656',
        '346', '422'])
    nitrc_ids = np.arange(2201, 2243)
    ids = np.arange(1, 43)

    # Group indices by session
    _, indices1 = np.unique(subject_ids, return_index=True)
    subject_ids1 = subject_ids[sorted(indices1)]
    nitrc_ids1 = nitrc_ids[sorted(indices1)]
    ids1 = ids[sorted(indices1)]

    tuple_indices = [np.where(subject_ids == s)[0] for s in subject_ids1]
    indices2 = [idx1 if idx1 not in indices1 else idx2
                for (idx1, idx2) in tuple_indices]
    subject_ids2 = subject_ids[indices2]
    nitrc_ids2 = nitrc_ids[indices2]
    ids2 = ids[indices2]

    # Check arguments
    max_subjects = len(subject_ids)
    if max(subjects) > max_subjects:
        warnings.warn('Warning: there are only {0} subjects'.format(
            max_subjects))
        subjects = range(max_subjects)
    unique_subjects, indices = np.unique(subjects, return_index=True)
    if len(unique_subjects) < len(subjects):
        warnings.warn('Warning: Duplicate subjects, removing them.')
        subjects = unique_subjects[np.argsort(indices)]

    n_subjects = len(subjects)

    archives = [
        [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for
         (nitrc_id, id) in zip(nitrc_ids1, ids1)],
        [url + '{0}/KKI2009-{1:02}.tar.bz2'.format(nitrc_id, id) for
         (nitrc_id, id) in zip(nitrc_ids2, ids2)]
                ]
    anat1 = [os.path.join('session1', subject,
                          'KKI2009-{0:02}-MPRAGE.nii'.format(i))
             for subject, i in zip(subject_ids1, ids1)]
    anat2 = [os.path.join('session2', subject,
                          'KKI2009-{0:02}-MPRAGE.nii'.format(i))
             for subject, i in zip(subject_ids2, ids2)]
    asl1 = [os.path.join('session1', subject,
                         'KKI2009-{0:02}-ASL.nii'.format(i))
            for subject, i in zip(subject_ids1, ids1)]
    asl2 = [os.path.join('session2', subject,
                         'KKI2009-{0:02}-ASL.nii'.format(i))
            for subject, i in zip(subject_ids2, ids2)]
    m01 = [os.path.join('session1', subject,
                        'KKI2009-{0:02}-ASLM0.nii'.format(i))
           for subject, i in zip(subject_ids1, ids1)]
    m02 = [os.path.join('session2', subject,
                        'KKI2009-{0:02}-ASLM0.nii'.format(i))
           for subject, i in zip(subject_ids2, ids2)]

    target = [
        [os.path.join('session1', subject, 'KKI2009-{0:02}.tar.bz2'.format(id))
         for (subject, id) in zip(subject_ids1, ids1)],
        [os.path.join('session2', subject, 'KKI2009-{0:02}.tar.bz2'.format(id))
         for (subject, id) in zip(subject_ids2, ids2)]
                ]
    anat = [anat1, anat2]
    asl = [asl1, asl2]
    m0 = [m01, m02]

    source_anat = []
    source_asl = []
    source_m0 = []
    source_archives = []
    session = []
    target_archives = []
    for i in sessions:
        if not (i in [1, 2]):
            raise ValueError('KIRBY dataset session id must be in [1, 2]')
        source_anat += [anat[i - 1][subject] for subject in subjects]
        source_asl += [asl[i - 1][subject] for subject in subjects]
        source_m0 += [m0[i - 1][subject] for subject in subjects]
        source_archives += [archives[i - 1][subject] for subject in subjects]
        target_archives += [target[i - 1][subject] for subject in subjects]

        session += [i] * n_subjects

    # Dataset description
    fdescr = _get_dataset_descr(dataset_name)

    # Call fetch_files once per subject.
    asl = []
    m0 = []
    anat = []
    for anat_u, asl_u, m0_u, archive, target in zip(source_anat, source_asl,
                                                    source_m0, source_archives,
                                                    target_archives):
        n, a, m = _fetch_files(
            data_dir,
            [(anat_u, archive, {'uncompress': True, 'move': target}),
             (asl_u, archive, {'uncompress': True, 'move': target}),
             (m0_u, archive, {'uncompress': True, 'move': target})],
            verbose=verbose)

        anat.append(n)
        asl.append(a)
        m0.append(m)

    return Bunch(anat=anat, asl=asl, m0=m0, session=session,
                 description=fdescr)

Example #42

0

Show file

File: datasets.py Project: ja-che/nistats

def fetch_openneuro_dataset(
        urls=None, data_dir=None, dataset_version='ds000030_R1.0.4',
        verbose=1):
    """Download openneuro bids dataset.

    Note: This function requires boto3 to be installed.

    Parameters
    ----------
    urls: list of string, optional
        Openneuro url list of dataset files to download. If not specified
        all files of the specified dataset will be downloaded.

    data_dir: string, optional
        Path to store the downloaded dataset. if None employ nilearn
        datasets default download directory.

    dataset_version: string, optional
        dataset version name. Assumes it is of the form [name]_[version].

    verbose: int, optional
        verbosity level (0 means no message).

    Returns
    -------
    data_dir: string
        Path to downloaded dataset

    downloaded_files: list of string
        Absolute paths of downloaded files on disk
    """
    boto3 = _check_import_boto3("boto3")
    data_prefix = '{}/{}/uncompressed'.format(
        dataset_version.split('_')[0], dataset_version)
    data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir,
                                verbose=verbose)

    # if urls are not specified we download the complete dataset index
    if urls is None:
        _, urls = fetch_openneuro_dataset_index(
            data_dir=data_dir, dataset_version=dataset_version, verbose=verbose)

    # The files_spec needed for _fetch_files
    files_spec = []
    files_dir = []
    for url in urls:
        url_path = url.split(data_prefix + '/')[1]
        file_dir = os.path.join(data_dir, url_path)
        files_spec.append((os.path.basename(file_dir), url, {}))
        files_dir.append(os.path.dirname(file_dir))

    # download the files
    downloaded = []
    for file_spec, file_dir in zip(files_spec, files_dir):
        # Timeout errors are common in the s3 connection so we try to avoid
        # failure of the dataset download for a transient instability
        success = False
        download_attempts = 4
        while download_attempts > 0 and not success:
            try:
                downloaded_files = _fetch_files(
                    file_dir, [file_spec], resume=True, verbose=verbose)
                downloaded += downloaded_files
                success = True
            except Exception:
                download_attempts -= 1
        if not success:
            raise Exception('multiple failures downloading %s' % file_spec[1])

    return data_dir, sorted(downloaded)

Example #43

0

Show file

File: datasets.py Project: takhs91/nistats

def fetch_spm_auditory(data_dir=None,
                       data_name='spm_auditory',
                       subject_id="sub001",
                       verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_auditory_data():
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        for file_name in SPM_AUDITORY_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path):
                subject_data[file_name] = file_path
            else:
                print("%s missing from filelist!" % file_name)
                return None

        _subject_data = {}
        _subject_data["func"] = sorted([
            subject_data[x] for x in subject_data.keys()
            if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))
        ])

        # volumes for this dataset of shape (64, 64, 64, 1); let's fix this
        for x in _subject_data["func"]:
            vol = nibabel.load(x)
            if len(vol.shape) == 4:
                vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                          vol.get_affine())
                nibabel.save(vol, x)

        _subject_data["anat"] = [
            subject_data[x] for x in subject_data.keys()
            if re.match("^sM00223_002\.img$", os.path.basename(x))
        ][0]

        # ... same thing for anat
        vol = nibabel.load(_subject_data["anat"])
        if len(vol.shape) == 4:
            vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                      vol.get_affine())
            nibabel.save(vol, _subject_data["anat"])

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_auditory_data()
    if data is not None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir,
                                  data_name="",
                                  subject_id=subject_id)

    return _glob_spm_auditory_data()

Example #44

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_acpi(data_dir, site_id='all', read=False, verbose=1):
    """ Load ACPI data (timeseries) extracted using MSDL atlas + compcor=10

    Parameters
    ----------
    data_dir : str
        Path to data. Base directory where it should contain folder named with
        'ACPI'.

    site_id : int or list of int, optional='all'
        Site id within, [3, 9, 20, 190, 1, 5]

        By default, data of all sites will be returned, site_id='all'.

        Total sites = 6

    read : bool
        Whether to read them or not using pandas.

    verbose : int
        Verbosity level

    Returns
    -------
    data : Bunch

    if read == False
        timeseries_paths : list of str
            Paths to csv contains timeseries data of each site.

        phenotypic_path : str
            Path to csv contains phenotypic data

    if read is set as True
        timeseries_data : list of numpy array
            Load them using pandas and convert to numpy arrays to be
            in compatible to nilearn and ConnectivityMeasure.

        subject_ids : list of str
            Its subject ids

        dx_groups : list of int
            Its DX_GROUP (1 - MJUser, 0 - No MJ)

        phenotypic_data : pandas Data
            Loaded phenotypic data
    """
    VALID_IDS = [3, 9, 20, 190, 1, 5]
    dataset_name = 'ACPI'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir, 'mta_1_phenotypic_data.csv')
    phenotypic_data = pd.read_csv(phenotypic_path)

    timeseries_name = 'timeseries'

    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = []

    if site_id == 'all':
        site_id = VALID_IDS

    if not isinstance(site_id, collections.Iterable):
        site_id = [site_id, ]

    if isinstance(site_id, collections.Iterable):
        for i, this_id in enumerate(site_id):
            if not isinstance(this_id, int) or this_id not in VALID_IDS:
                raise ValueError('An invalid site_id={0} is provided. '
                                 'Valid site names are: {1}'
                                 .format(this_id, VALID_IDS))
            file_ids = phenotypic_data[phenotypic_data['SITE_ID'] == this_id]
            file_ids = file_ids['SUBID'].values
            for this_file_id in file_ids:
                filepath = glob.glob(os.path.join(data_dir,
                                                  str(this_file_id)
                                                  + '_timeseries.csv'))
                paths.extend(filepath)

    if read:
        timeseries_data = []
        subject_ids = []
        dx_groups = []
        if len(paths) != 0:
            for path in paths:
                filename = os.path.splitext(os.path.split(path)[1])[0]
                this_id = int(filename.split('_timeseries')[0])
                subject_ids.append(this_id)
                data = pd.read_csv(path)
                data = data.drop('Unnamed: 0', axis=1)
                timeseries_data.append(np.asarray(data))
                this_group = phenotypic_data[
                    phenotypic_data['SUBID'] == this_id]['MJUser']
                dx_groups.append(this_group.values[0])
        return Bunch(timeseries_data=timeseries_data, subject_ids=subject_ids,
                     dx_groups=dx_groups, phenotypic_data=phenotypic_data)
    else:
        return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)

Example #45

0

Show file

File: datasets.py Project: banilo/pypreprocess

def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
                       subject_id="sub001", verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_auditory_data():
        """glob data from subject_dir.

        """

        if not os.path.exists(subject_dir):
            return None

        subject_data = {}
        for file_name in SPM_AUDITORY_DATA_FILES:
            file_path = os.path.join(subject_dir, file_name)
            if os.path.exists(file_path):
                subject_data[file_name] = file_path
            else:
                print("%s missing from filelist!" % file_name)
                return None

        _subject_data = {}
        _subject_data["func"] = sorted([subject_data[x]
                                        for x in subject_data.keys()
                                        if re.match("^fM00223_0\d\d\.img$",
                                                    os.path.basename(x))])

        # volumes for this dataset of shape (64, 64, 64, 1); let's fix this
        for x in _subject_data["func"]:
            vol = nibabel.load(x)
            if len(vol.shape) == 4:
                vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                          vol.get_affine())
                nibabel.save(vol, x)

        _subject_data["anat"] = [subject_data[x] for x in subject_data.keys()
                                 if re.match("^sM00223_002\.img$",
                                             os.path.basename(x))][0]

        # ... same thing for anat
        vol = nibabel.load(_subject_data["anat"])
        if len(vol.shape) == 4:
            vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0],
                                      vol.get_affine())
            nibabel.save(vol, _subject_data["anat"])

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_auditory_data()
    if not data is None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
           "MoAEpilot.zip")
    archive_path = os.path.join(subject_dir, os.path.basename(url))
    _fetch_file(url, subject_dir)
    try:
        _uncompress_file(archive_path)
    except:
        print("Archive corrupted, trying to download it again.")
        return fetch_spm_auditory(data_dir=data_dir, data_name="",
                                  subject_id=subject_id)

    return _glob_spm_auditory_data()

Example #46

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_cobre(data_dir, read=False, verbose=1):
    """ Load COBRE data (timeseries) extracted using MSDL atlas + (compcor=10
    and motion regressors).

    Parameters
    ----------
    data_dir : str
        Path to data. Base directory where it should contain folder named with
        'COBRE'.

    read : bool
        Whether to read them or not using pandas.

    verbose : int
        Verbosity level

    Returns
    -------
    data : Bunch

    if read == False
        timeseries_paths : list of str
            Paths to csv contains timeseries data of each site.

        phenotypic_path : str
            Path to csv contains phenotypic data

    if read is set as True
        timeseries_data : list of numpy array
            Load them using pandas and convert to numpy arrays to be
            in compatible to nilearn and ConnectivityMeasure.

        scan_ids : list of str
            Its file ids

        dx_groups : list of int
            Its DX_GROUP Schizophrenia, Control, Bipolar, Schizoaffective

        phenotypic_data : pandas Data
            Loaded phenotypic data
    """
    dataset_name = 'COBRE'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir,
                                   '1139_Cobre_Neuropsych_V2_20160607.csv')
    phenotypic_data = pd.read_csv(phenotypic_path)

    timeseries_name = 'timeseries'

    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = glob.glob(os.path.join(data_dir, '*.csv'))

    if read:
        timeseries_data = []
        file_ids = []
        dx_groups = []
        for path in paths:
            filename = os.path.splitext(os.path.split(path)[1])[0]
            this_id = filename.split('_timeseries')[0]
            file_ids.append(this_id)
            data = pd.read_csv(path)
            timeseries_data.append(np.asarray(data))
            this_group = phenotypic_data[
                (phenotypic_data['Unnamed: 0'] == this_id)]['Unnamed: 1']
            if np.any(this_group):
                dx_groups.append(this_group.values[0])
            else:
                dx_groups.append('Did not match')
        return Bunch(timeseries_data=timeseries_data, file_ids=file_ids,
                     dx_groups=dx_groups, phenotypic_data=phenotypic_data)
    else:
        return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)

Example #47

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_camcan(data_dir, session_id='all', read=False, verbose=1):
    """ Load CAMCAN data (timeseries) extracted using MSDL atlas + (compcor=10
    and motion regressors)

    Parameters
    ----------
    data_dir : str
        Path to data. Base directory where it should contain folder named with
        'camcan'.

    session_id : int or list of int, optional='all'
        Session within, [1, 2, 3, 4]

    read : bool
        Whether to read them or not using pandas.

    verbose : int
        Verbosity level

    Returns
    -------
    data : Bunch

    if read == False
        timeseries_paths : list of str
            Paths to csv contains timeseries data of each site.

        phenotypic_path : str
            Path to csv contains phenotypic data

    if read is set as True
        timeseries_data : list of numpy array
            Load them using pandas and convert to numpy arrays to be
            in compatible to nilearn and ConnectivityMeasure.

        subject_ids : list of str
            Its subject ids

        phenotypic_data : pandas Data
            Loaded phenotypic data
    """
    VALID_IDS = [1, 2, 3, 4]
    dataset_name = 'camcan'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir, 'participant_data.csv')
    phenotypic_data = pd.read_csv(phenotypic_path)

    timeseries_name = 'timeseries'
    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = []
    timeseries_data = []
    subject_ids = []

    session_names = {1: 'cbuid280_sess1',
                     2: 'cbuid280_sess2',
                     3: 'cbuid280_sess3',
                     4: 'cbuid280_sess4'}

    if session_id == 'all':
        session_id = VALID_IDS

    if not isinstance(session_id, collections.Iterable):
        session_id = [session_id, ]

    if isinstance(session_id, collections.Iterable):
        for i, this_id in enumerate(session_id):
            print(this_id)
            if not isinstance(this_id, int) or this_id not in VALID_IDS:
                raise ValueError('An invalid session_id={0} is provided. '
                                 'Valid session ids are: {1}'
                                 .format(this_id, VALID_IDS))
            this_id_data = phenotypic_data[session_names[this_id]]
            this_id_data = this_id_data[this_id_data.notnull()]
            session_name = session_names[this_id]
            this_data_indices = this_id_data.index.values
            for index in this_data_indices:
                observation_id = phenotypic_data[
                    (phenotypic_data[session_name] == this_id_data[index])]['Observations']
                filepath = glob.glob(os.path.join(data_dir, 'sub-' + observation_id[index]
                                                  + '_timeseries.csv'))
                print(filepath)
                if len(filepath) != 0:
                    if read:
                        subject_ids.append(observation_id[index])
                        this_index_data = pd.read_csv(filepath[0])
                        timeseries_data.append(this_index_data)
                    else:
                        paths.extend(filepath)

    if read:
        return Bunch(timeseries_data=timeseries_data, subject_ids=subject_ids,
                     phenotypic_data=pd.read_csv(phenotypic_path))
    else:
        return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)

Example #48

0

Show file

File: timeseries.py Project: KamalakerDadi/Data-Processing

def load_adnidod(data_dir, read=False, verbose=1):
    """ Load ADNIDOD data (timeseries) extracted using MSDL atlas + (compcor=10
    and motion regressors).

    Parameters
    ----------
    data_dir : str
        Path to data. Base directory where it should contain folder named with
        'ADNIDOD'.

    read : bool
        Whether to read them or not using pandas.

    verbose : int
        Verbosity level

    Returns
    -------
    data : Bunch

    if read == False
        timeseries_paths : list of str
            Paths to csv contains timeseries data of each site.

        phenotypic_path : str
            Path to csv contains phenotypic data

    if read is set as True
        timeseries_data : list of numpy array
            Load them using pandas and convert to numpy arrays to be
            in compatible to nilearn and ConnectivityMeasure.

        scan_ids : list of str
            Its file ids

        dx_groups : list of int
            Its DX_GROUP (1 - PTSD, 0 - Control)

        phenotypic_data : pandas Data
            Loaded phenotypic data
    """
    dataset_name = 'ADNIDOD'

    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    phenotypic_path = os.path.join(data_dir, 'adnidod_demographic.csv')
    phenotypic_data = pd.read_csv(phenotypic_path)

    timeseries_name = 'timeseries'

    data_dir = _get_dataset_dir(timeseries_name, data_dir=data_dir,
                                verbose=verbose)
    paths = glob.glob(os.path.join(data_dir, '*.csv'))

    if read:
        timeseries_data = []
        scan_ids = []
        dx_groups = []
        for path in paths:
            filename = os.path.splitext(os.path.split(path)[1])[0]
            this_id = filename.split('_timeseries')[0]
            scan_ids.append(this_id)
            data = pd.read_csv(path)
            data = data.drop('Unnamed: 0', axis=1)
            timeseries_data.append(np.asarray(data))
            this_group = phenotypic_data[
                phenotypic_data['ID_scan'] == this_id]['diagnosis']
            dx_groups.append(this_group.values[0])
        return Bunch(timeseries_data=timeseries_data, scan_ids=scan_ids,
                     dx_groups=dx_groups, phenotypic_data=phenotypic_data)
    else:
        return Bunch(timeseries_paths=paths, phenotypic_path=phenotypic_path)

Example #49

0

Show file

File: datasets.py Project: banilo/pypreprocess

def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri",
                              subject_id="sub001", verbose=1):
    """Fetcher for Multi-modal Face Dataset.

    Parameters
    ----------
    data_dir: string
        path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func1': string list. Paths to functional images for session 1
        - 'func2': string list. Paths to functional images for session 2
        - 'trials_ses1': string list. Path to onsets file for session 1
        - 'trials_ses2': string list. Path to onsets file for session 2
        - 'anat': string. Path to anat file

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/

    """

    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)

    def _glob_spm_multimodal_fmri_data():
        """glob data from subject_dir."""
        _subject_data = {'slice_order': 'descending'}

        for s in range(2):
            # glob func data for session s + 1
            session_func = sorted(glob.glob(
                    os.path.join(
                        subject_dir,
                        ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % (
                                s + 1, s + 5)))))
            if len(session_func) < 390:
                print "Missing %i functional scans for session %i." % (
                    390 - len(session_func), s)
                return None
            else:
                _subject_data['func%i' % (s + 1)] = session_func

            # glob trials .mat file
            sess_trials = os.path.join(
                subject_dir,
                "fMRI/trials_ses%i.mat" % (s + 1))
            if not os.path.isfile(sess_trials):
                print "Missing session file: %s" % sess_trials
                return None
            else:
                _subject_data['trials_ses%i' % (s + 1)] = sess_trials

        # glob for anat data
        anat = os.path.join(subject_dir, "sMRI/smri.img")
        if not os.path.isfile(anat):
            print "Missing structural image."
            return None
        else:
            _subject_data["anat"] = anat

        return Bunch(**_subject_data)

    # maybe data_dir already contains the data ?
    data = _glob_spm_multimodal_fmri_data()
    if not data is None:
        return data

    # No. Download the data
    print("Data absent, downloading...")
    urls = [
        # fmri
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
        "multimodal_fmri.zip"),

        # structural
        ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
         "multimodal_smri.zip")
        ]

    for url in urls:
        archive_path = os.path.join(subject_dir, os.path.basename(url))
        _fetch_file(url, subject_dir)
        try:
            _uncompress_file(archive_path)
        except:
            print("Archive corrupted, trying to download it again.")
            return fetch_spm_multimodal_fmri_data(data_dir=data_dir,
                                                  data_name="",
                                                  subject_id=subject_id)

    return _glob_spm_multimodal_fmri_data()