def test_get_data_dir(tmpdir):
    # testing folder creation under different environments, enforcing
    # a custom clean install
    os.environ.pop('NILEARN_DATA', None)
    os.environ.pop('NILEARN_SHARED_DATA', None)

    expected_base_dir = os.path.expanduser('~/modl_data')
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'modl_data')
    os.environ['MODL_DATA'] = expected_base_dir
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'mdoln_shared_data')
    os.environ['MODL_SHARED_DATA'] = expected_base_dir
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'modl_data')
    os.environ.pop('MODL_DATA', None)
    os.environ.pop('MODL_SHARED_DATA', None)
    data_dir = get_data_dirs(expected_base_dir)[0]
    assert_equal(data_dir, expected_base_dir)
def load_netflix():
    data_home = get_data_dirs()[0]
    path = os.path.join(data_home, "nf_prize", "X_tr.pkl")
    X_tr = load(path)
    path = os.path.join(data_home, "nf_prize", "X_te.pkl")
    X_te = load(path)
    return X_tr, X_te
예제 #3
0
파일: adhd.py 프로젝트: YW81/modl
def fetch_adhd(n_subjects=40, data_dir=None,
               url=None, resume=True,
               modl_data_dir=None,
               mask_url=None,
               verbose=1):
    dataset = nilearn_fetch_adhd(n_subjects=n_subjects,
                                 data_dir=data_dir, url=url, resume=resume,
                                 verbose=verbose)
    root_dir = dataset.func[0]
    tail_dir = ''
    while tail_dir != 'adhd':
        root_dir, tail_dir = os.path.split(root_dir)
    root_dir = os.path.join(root_dir, tail_dir)

    modl_data_dir = get_data_dirs(modl_data_dir)[0]
    mask_data_dir = join(modl_data_dir, 'adhd')
    if mask_url is None:
        mask_url = 'http://amensch.fr/data/adhd/mask_img.nii.gz'
    _fetch_file(mask_url, mask_data_dir, resume=resume)
    mask_img = join(mask_data_dir, 'mask_img.nii.gz')
    behavioral = pd.DataFrame(dataset.phenotypic)
    behavioral.loc[:, 'Subject'] = pd.to_numeric(behavioral.loc[:, 'Subject'])
    behavioral.set_index('Subject', inplace=True)
    behavioral.index.names = ['subject']
    rest = pd.DataFrame(data=list(zip(dataset.func, dataset.confounds)),
                        columns=['filename', 'confounds'],
                        index=behavioral.index)
    return Bunch(rest=rest,
                 behavioral=behavioral, description=dataset.description,
                 mask=mask_img, root=root_dir)
def load_movielens(version):
    data_home = get_data_dirs()[0]

    if version == "100k":
        path = os.path.join(data_home, "movielens100k", "movielens100k.pkl")
    elif version == "1m":
        path = os.path.join(data_home, "movielens1m", "movielens1m.pkl")
    elif version == "10m":
        path = os.path.join(data_home, "movielens10m", "movielens10m.pkl")
    else:
        raise ValueError("Invalid version of movielens.")

    # FIXME: make downloader
    if not os.path.exists(path):
        raise ValueError("Dowload dataset using 'make download-movielens%s' at"
                         " project root." % version)

    X = load(path)
    return X
def load_image(source,
               scale=1,
               gray=False,
               memory=Memory(cachedir=None)):
    data_dir = get_data_dirs()[0]
    if source == 'face':
        image = face(gray=gray)
        image = image.astype(np.float32) / 255
        if image.ndim == 2:
            image = image[..., np.newaxis]
        if scale != 1:
            image = memory.cache(rescale)(image, scale=scale)
        return image
    elif source == 'lisboa':
        image = imread(join(data_dir, 'images', 'lisboa.jpg'), as_grey=gray)
        image = image.astype(np.float32) / 255
        if image.ndim == 2:
            image = image[..., np.newaxis]
        if scale != 1:
            image = memory.cache(rescale)(image, scale=scale)
        return image
    elif source == 'aviris':
        image = open_image(
            join(data_dir,
                 'aviris',
                 'f100826t01p00r05rdn_b/'
                 'f100826t01p00r05rdn_b_sc01_ort_img.hdr'))
        image = np.array(image.open_memmap(), dtype=np.float32)
        good_bands = list(range(image.shape[2]))
        good_bands.remove(110)
        image = image[:, :, good_bands]
        indices = image == -50
        image[indices] = -1
        image[~indices] -= np.min(image[~indices])
        image[~indices] /= np.max(image[~indices])
        return image
    else:
        raise ValueError('Data source is not known')
def compute_components(n_components, batch_size, learning_rate, method,
                       reduction, alpha, step_size, n_jobs, n_epochs, verbose,
                       source, _run):
    basedir = join(_run.observers[0].basedir, str(_run._id))
    artifact_dir = join(basedir, 'artifacts')
    if not os.path.exists(artifact_dir):
        os.makedirs(artifact_dir)

    if source == 'hcp':
        # Hack to recover data from TSP
        train_size = None
        smoothing_fwhm = 3
        test_size = 2
        data_dir = get_data_dirs()[0]
        mask = fetch_hcp_mask()
        masker = MultiRawMasker(mask_img=mask,
                                smoothing_fwhm=smoothing_fwhm,
                                detrend=True,
                                standardize=True)
        mapping = json.load(
            open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r'))
        data = sorted(list(mapping.values()))
        data = list(map(lambda x: join(data_dir, x), data))
        data = pd.DataFrame(data, columns=['filename'])
    else:
        smoothing_fwhm = 6
        train_size = 4
        test_size = 4
        raw_res_dir = join(get_output_dir(), 'unmasked', source)
        try:
            masker, data = get_raw_rest_data(raw_res_dir)
        except ValueError:  # On local machine:
            raw_res_dir = join(get_output_dir(), 'unmask', source)
            masker, data = get_raw_rest_data(raw_res_dir)

    train_imgs, test_imgs = train_test_split(data,
                                             test_size=test_size,
                                             random_state=0,
                                             train_size=train_size)
    train_imgs = train_imgs['filename'].values
    test_imgs = test_imgs['filename'].values

    cb = rfMRIDictionaryScorer(test_imgs, info=_run.info)
    dict_fact = fMRIDictFact(
        method=method,
        mask=masker,
        verbose=verbose,
        n_epochs=n_epochs,
        n_jobs=n_jobs,
        random_state=1,
        n_components=n_components,
        smoothing_fwhm=smoothing_fwhm,
        learning_rate=learning_rate,
        batch_size=batch_size,
        reduction=reduction,
        step_size=step_size,
        alpha=alpha,
        callback=cb,
    )
    dict_fact.fit(train_imgs)
    dict_fact.components_img_.to_filename(
        join(artifact_dir, 'components.nii.gz'))
    fig = plt.figure()
    display_maps(fig, dict_fact.components_img_)
    plt.savefig(join(artifact_dir, 'components.png'))

    fig, ax = plt.subplots(1, 1)
    ax.plot(cb.cpu_time, cb.score, marker='o')
    _run.info['time'] = cb.cpu_time
    _run.info['score'] = cb.score
    _run.info['iter'] = cb.iter
    plt.savefig(join(artifact_dir, 'score.png'))