예제 #1
0
def main(mask: str = 'hcp'):
    masked_dir = join(get_data_dir(), 'masked_%s' % mask)
    reduced_dir = join(get_data_dir(), 'reduced_512_%s' % mask)
    mask_all(output_dir=masked_dir, n_jobs=30, mask=mask)
    reduce_all(output_dir=reduced_dir,
               masked_dir=masked_dir,
               n_jobs=30,
               mask=mask)
예제 #2
0
def fetch_reduced_loadings(data_dir=None,
                           url=None,
                           verbose=False,
                           resume=True):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/loadings/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'loadings'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = STUDY_LIST

    paths = ['data_%s.pt' % key for key in keys]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = (
        "Z-statistic loadings over a dictionary of 453 components covering "
        "grey-matter `modl_atlas['components_512_gm']` "
        "for 35 different task fMRI studies.")

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return params
예제 #3
0
def fetch_hcp(data_dir=None, n_subjects=None, subjects=None, from_file=True):
    data_dir = get_data_dir(data_dir)
    BASE_CONTRASTS = [
        'FACES',
        'SHAPES',
        'PUNISH',
        'REWARD',
        'MATH',
        'STORY',
        'MATCH',
        'REL',
        'RANDOM',
        'TOM',
        'LF',
        'RF',
        'LH',
        'RH',
        'CUE',
        '0BK_BODY',
        '0BK_FACE',
        '0BK_PLACE',
        '0BK_TOOL',
        '2BK_BODY',
        '2BK_FACE',
        '2BK_PLACE',
        '2BK_TOOL',
    ]

    source_dir = join(data_dir, 'HCP900')
    if not os.path.exists(source_dir):
        raise ValueError('Please ensure that %s contains all required data.' %
                         source_dir)
    res = hcp_builder_fetch_hcp(data_dir=source_dir,
                                n_subjects=n_subjects,
                                from_file=from_file,
                                subjects=subjects,
                                on_disk=True)
    rest = res.rest.assign(confounds=[None] * res.rest.shape[0])
    task = res.task.assign(confounds=[None] * res.task.shape[0])

    task.sort_index(inplace=True)
    rest.sort_index(inplace=True)

    # Make it compatible with the other studies
    contrasts = res.contrasts.loc[idx[:, :, BASE_CONTRASTS, :], :]
    contrasts = contrasts[['z_map']]
    # Replace symlink
    contrasts['z_map'] = contrasts['z_map'].map(
        lambda x: x.replace('/storage/store/data/HCP900', source_dir))
    contrasts.reset_index(inplace=True)
    contrasts['study'] = 'hcp'
    contrasts.set_index(['study', 'subject', 'task', 'contrast', 'direction'],
                        inplace=True)
    contrasts.sort_index(inplace=True)
    return Bunch(rest=rest,
                 contrasts=contrasts,
                 task=task,
                 behavioral=res.behavioral,
                 mask=res.mask,
                 root=res.root)
예제 #4
0
def fetch_la5c(data_dir=None):
    data_dir = get_data_dir(data_dir)
    source_dir = join(data_dir, 'la5c', 'ds000030', 'glm')
    if not os.path.exists(source_dir):
        raise ValueError('Please ensure that %s contains all required data.' %
                         source_dir)
    z_maps = glob.glob(join(source_dir, '*/*/*', 'z_*.nii.gz'))
    subjects = []
    contrasts = []
    tasks = []
    filtered_z_maps = []
    for z_map in z_maps:
        dirname, contrast = os.path.split(z_map)
        contrast = contrast[2:-7]
        dirname, _ = os.path.split(dirname)
        dirname, task = os.path.split(dirname)
        dirname, subject = os.path.split(dirname)
        subject = int(subject[-3:])
        subjects.append(subject)
        contrasts.append(contrast)
        tasks.append(task)
        filtered_z_maps.append(z_map)
    df = pd.DataFrame(
        data={
            'study': 'la5c',
            'subject': subjects,
            'task': tasks,
            'contrast': contrasts,
            'direction': 'level1',
            'z_map': filtered_z_maps,
        })
    df.set_index(['study', 'subject', 'task', 'contrast', 'direction'],
                 inplace=True)
    df.sort_index(inplace=True)
    return df
예제 #5
0
def fetch_mask(data_dir=None, url=None, resume=True, verbose=1):
    if url is None:
        url = 'http://cogspaces.github.io/assets/data/hcp_mask.nii.gz'
    files = [('hcp_mask.nii.gz', url, {})]

    dataset_name = 'mask'
    data_dir = get_data_dir(data_dir)
    dataset_dir = _get_dataset_dir(dataset_name,
                                   data_dir=data_dir,
                                   verbose=verbose)
    files = _fetch_files(dataset_dir, files, resume=resume, verbose=verbose)
    return files[0]
예제 #6
0
def load_from_directory(dataset, data_dir=None):
    data_dir = get_data_dir(data_dir)
    dataset_dir = join(data_dir, dataset)
    Xs, ys = {}, {}
    regex = re.compile(r'data_(.*).pt')
    for file in os.listdir(dataset_dir):
        m = regex.match(file)
        if m is not None:
            study = m.group(1)
            Xs[study], ys[study] = load(join(dataset_dir, file))
    ys = add_study_contrast(ys)
    return Xs, ys
예제 #7
0
def baseline():
    system = dict(
        device=-1,
        seed=0,
        verbose=100,
    )
    data = dict(source_dir=join(get_data_dir(), 'reduced_512_icbm_gm'),
                studies='archi')
    model = dict(
        normalize=True,
        estimator='logistic',
        max_iter=10000,
    )
    logistic = dict(l2_penalty=1e-6, )
예제 #8
0
def get_study_info():
    input_data, targets = load_reduced_loadings(data_dir=get_data_dir())
    targets = pd.concat(targets.values(), axis=0)
    targets['#subjects'] = targets.groupby(
        by=['study', 'task', 'contrast'])['subject'].transform('nunique')
    targets['#contrasts_per_task'] = targets.groupby(
        by=['study', 'task'])['contrast'].transform('nunique')
    targets['#contrasts_per_study'] = targets.groupby(
        by='study')['contrast'].transform('nunique')
    targets['chance_study'] = 1 / targets['#contrasts_per_study']
    targets['chance_task'] = 1 / targets['#contrasts_per_task']
    citations = _get_citations()
    targets = pd.merge(targets, citations, on='study', how='left')
    targets = targets.groupby(
        by=['study', 'task', 'contrast']).first().sort_index().drop(
            columns='index').reset_index()

    targets['study__task'] = targets.apply(
        lambda x: f'{x["study"]}__{x["task"]}', axis='columns')
    targets['name_task'] = targets.apply(
        lambda x: f'[{x["bibkey"]}] {x["task"]}', axis='columns')

    def apply(x):
        comment = x['comment'].iloc[0]
        if comment != '':
            tasks = comment
            tasks_lim = comment
        else:
            tasks_list = x['task'].unique()
            tasks = ' & '.join(tasks_list)
            if len(tasks) > 50:
                tasks_lim = tasks_list[0] + ' & ...'
            else:
                tasks_lim = tasks
        name = f'[{x["bibkey"].iloc[0]}] {tasks_lim}'
        latex_name = f'\cite{{{x["citekey"].iloc[0]}}} {tasks}'.replace(
            '&', '\&')
        name = pd.DataFrame(data={
            'name': name,
            'latex_name': latex_name
        },
                            index=x.index)
        return name

    name = targets.groupby(by='study').apply(apply)
    targets = pd.concat([targets, name], axis=1)
    return targets
예제 #9
0
def fetch_archi(data_dir=None):
    INTERESTING_CONTRASTS = [
        "expression_control", "expression_intention", "expression_sex",
        "face_control", "face_sex", "face_trusty", "audio", "calculaudio",
        "calculvideo", "clicDaudio", "clicDvideo", "clicGaudio", "clicGvideo",
        "computation", "damier_H", "damier_V", "object_grasp",
        "object_orientation", "rotation_hand", "rotation_side", "saccade",
        "motor-cognitive", "false_belief_audio", "false_belief_video",
        "mecanistic_audio", "mecanistic_video", "non_speech", "speech",
        "triangle_intention", "triangle_random"
    ]

    data_dir = get_data_dir(data_dir)
    source_dir = join(data_dir, 'archi', 'glm')
    if not os.path.exists(source_dir):
        raise ValueError('Please ensure that %s contains all required data.' %
                         source_dir)
    z_maps = glob.glob(join(source_dir, '*/*/*', 'z_*.nii.gz'))
    subjects = []
    contrasts = []
    tasks = []
    filtered_z_maps = []
    for z_map in z_maps:
        dirname, contrast = os.path.split(z_map)
        contrast = contrast[2:-7]
        if contrast in INTERESTING_CONTRASTS:
            dirname, _ = os.path.split(dirname)
            dirname, task = os.path.split(dirname)
            dirname, subject = os.path.split(dirname)
            subject = int(subject[-3:])
            subjects.append(subject)
            contrasts.append(contrast)
            tasks.append(task)
            filtered_z_maps.append(z_map)
    df = pd.DataFrame(
        data={
            'subject': subjects,
            'task': tasks,
            'contrast': contrasts,
            'direction': 'level1',
            'study': 'archi',
            'z_map': filtered_z_maps,
        })
    df.set_index(['study', 'subject', 'task', 'contrast', 'direction'],
                 inplace=True)
    df.sort_index(inplace=True)
    return df
예제 #10
0
def fetch_atlas_modl(data_dir=None, url=None, resume=True, verbose=1):
    """Download and load a multi-scale atlas computed using MODL over HCP900.

    Parameters
    ----------
    data_dir: string, optional
        Path of the data directory. Used to force data storage in a non-
        standard location. Default: None (meaning: default)
    url: string, optional
        Download URL of the dataset. Overwrite the default URL.
    """

    if url is None:
        url = 'http://cogspaces.github.io/assets/data/modl/'

    data_dir = get_data_dir(data_dir)
    dataset_name = 'modl'
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    keys = [
        'components_64', 'components_128', 'components_453_gm',
        'loadings_128_gm'
    ]

    paths = [
        'components_64.nii.gz',
        'components_128.nii.gz',
        'components_453_gm.nii.gz',
        'loadings_128_gm.npy',
    ]
    urls = [url + path for path in paths]
    files = [(path, url, {}) for path, url in zip(paths, urls)]

    files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)

    params = {key: file for key, file in zip(keys, files)}

    fdescr = 'Components computed using the MODL package, at various scale,' \
             'from HCP900 data'

    params['description'] = fdescr
    params['data_dir'] = data_dir

    return Bunch(**params)
예제 #11
0
def default():
    seed = 0
    system = dict(
        device=-1,
        verbose=100,
    )
    data = dict(source_dir=join(get_data_dir(), 'reduced_512_lstsq'),
                studies=['archi', 'hcp'])
    model = dict(
        normalize=True,
        estimator='factored',
        max_iter=100,
    )
    factored = dict(
        shared_embedding_size=100,
        batch_size=32,
        dropout=0.75,
        lr=1e-3,
        input_dropout=0.,
    )
    logistic = dict(l2_penalty=1e-3, )
예제 #12
0
def factored():
    system = dict(
        device=-1,
        seed=0,
        verbose=50,
    )
    data = dict(source_dir=join(get_data_dir(), 'reduced_512'), studies='all')

    model = dict(
        normalize=True,
        estimator='factored',
        study_weight='study',
        max_iter=500,
    )
    factored = dict(
        shared_embedding_size=100,
        batch_size=32,
        dropout=0.75,
        lr=1e-3,
        input_dropout=0.25,
    )
예제 #13
0
def fetch_camcan(data_dir=None):
    data_dir = get_data_dir(data_dir)
    source_dir = join(data_dir, 'camcan', 'camcan_smt_maps')
    if not os.path.exists(source_dir):
        raise ValueError('Please ensure that %s contains all required data.' %
                         source_dir)
    z_maps = glob.glob(join(source_dir, '*', '*_z_score.nii.gz'))
    subjects = []
    contrasts = []
    tasks = []
    filtered_z_maps = []
    for z_map in z_maps:
        dirname, contrast = os.path.split(z_map)
        _, dirname = os.path.split(dirname)
        contrast = contrast[13:-15]
        subject = int(dirname[6:])
        if contrast in [
                'AudOnly', 'VidOnly', 'AudVid1200', 'AudVid300', 'AudVid600'
        ]:
            subjects.append(subject)
            contrasts.append(contrast)
            if contrast in ['AudOnly', 'VidOnly']:
                tasks.append('audio-video')
            else:
                tasks.append('AV-freq')
            filtered_z_maps.append(z_map)
    df = pd.DataFrame(
        data={
            'subject': subjects,
            'task': tasks,
            'contrast': contrasts,
            'direction': 'level1',
            'study': 'camcan',
            'z_map': filtered_z_maps,
        })
    df.set_index(['study', 'subject', 'task', 'contrast', 'direction'],
                 inplace=True)
    df.sort_index(inplace=True)
    return df
예제 #14
0
def fetch_brainomics(data_dir=None):
    data_dir = get_data_dir(data_dir)
    source_dir = join(data_dir, 'brainomics')
    if not os.path.exists(source_dir):
        raise ValueError('Please ensure that %s contains all required data.' %
                         source_dir)
    z_maps = glob.glob(join(source_dir, '*', 'c_*.nii.gz'))
    subjects = []
    contrasts = []
    tasks = []
    filtered_z_maps = []
    regex = re.compile('.*vs.*')
    for z_map in z_maps:
        match = re.match(regex, z_map)
        if match is None:
            dirname, contrast = os.path.split(z_map)
            if contrast != 'effects_of_interest':
                contrast = contrast[6:-7]
                subject = int(dirname[-2:])
                subjects.append(subject)
                contrasts.append(contrast)
                tasks.append('localizer')
                filtered_z_maps.append(z_map)
    df = pd.DataFrame(
        data={
            'subject': subjects,
            'task': tasks,
            'contrast': contrasts,
            'direction': 'level1',
            'study': 'brainomics',
            'z_map': filtered_z_maps,
        })
    df.set_index(['study', 'subject', 'task', 'contrast', 'direction'],
                 inplace=True)
    df.sort_index(inplace=True)
    return df
예제 #15
0
def init_fetch_mask() -> str:
    """For mask bootstrapping"""
    return join(get_data_dir(), 'mask', 'hcp_mask.nii.gz')
예제 #16
0
def run_exp(output_dir, config_updates, _id):
    """Boiler plate function that has to be put in every multiple
        experiment script, as exp does not pickle."""
    exp.run_command(
        'print_config',
        config_updates=config_updates,
    )
    run = exp._create_run(config_updates=config_updates, )
    run._id = _id
    observer = OurFileStorageObserver.create(basedir=output_dir)
    run.observers.append(observer)
    run()


if __name__ == '__main__':
    source_dir = join(get_data_dir(), 'reduced_512')
    data, target = load_data_from_dir(data_dir=source_dir)
    studies = list(data.keys())
    l2_penalties = np.logspace(-4, -1, 20)

    config_updates = ParameterGrid({
        'logistic.l2_penalty': l2_penalties,
        'data.studies': studies
    })
    output_dir = join(get_output_dir(), 'baseline_logistic_icbm_gm')

    _id = get_id(output_dir)

    Parallel(n_jobs=40, verbose=100)(
        delayed(run_exp)(output_dir, config_update, _id=_id + i)
        for i, config_update in enumerate(config_updates))
예제 #17
0
    masker = NiftiMasker(mask_img=mask).fit()
    components = masker.transform(dictionary)
    for study in studies:
        this_data, targets = load(join(masked_dir, 'data_%s.pt' % study))
        n_samples = this_data.shape[0]
        batches = list(gen_batches(n_samples, batch_size))
        this_data = Parallel(n_jobs=n_jobs,
                             verbose=10,
                             backend='multiprocessing',
                             mmap_mode='r')(delayed(single_reduce)(
                                 components, this_data[batch], lstsq=lstsq)
                                            for batch in batches)
        this_data = np.concatenate(this_data, axis=0)

        dump((this_data, targets), join(output_dir, 'data_%s.pt' % study))


n_jobs = 65

mask_contrasts(studies='all',
               use_raw=True,
               output_dir=join(get_data_dir(), 'loadings'),
               n_jobs=n_jobs)

reduce_contrasts(studies='all',
                 masked_dir=join(get_data_dir(), 'masked'),
                 output_dir=join(get_data_dir(), 'loadings'),
                 components='components_453_gm',
                 n_jobs=n_jobs,
                 lstsq=False)