"""Boiler plate function that has to be put in every multiple
        experiment script, as exp does not pickle."""
    exp.run_command(
        'print_config',
        config_updates=config_updates,
    )
    run = exp._create_run(config_updates=config_updates, )
    run._id = _id
    observer = OurFileStorageObserver.create(basedir=output_dir)
    run.observers.append(observer)
    run()


if __name__ == '__main__':
    source_dir = join(get_data_dir(), 'reduced_512')
    data, target = load_data_from_dir(data_dir=source_dir)
    studies = list(data.keys())
    l2_penalties = np.logspace(-4, -1, 20)

    config_updates = ParameterGrid({
        'logistic.l2_penalty': l2_penalties,
        'data.studies': studies
    })
    output_dir = join(get_output_dir(), 'baseline_logistic_icbm_gm')

    _id = get_id(output_dir)

    Parallel(n_jobs=40, verbose=100)(
        delayed(run_exp)(output_dir, config_update, _id=_id + i)
        for i, config_update in enumerate(config_updates))
Beispiel #2
0
        else:
            fontweight = 'normal'
        ax.annotate(y_labels[estimator],
                    xy=(1, i),
                    xytext=(10, 0),
                    textcoords="offset points",
                    fontweight=fontweight,
                    xycoords=('axes fraction', 'data'),
                    va='center',
                    ha='left')

    sns.despine(fig)

    plt.setp(ax.legend(), visible=False)
    plt.savefig(join(save_dir, 'accuracies.pdf'))
    plt.close(fig)


output_dir = join(get_output_dir(output_dir=None), 'normalized',
                  'split_by_task')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

save_dir = join(output_dir, 'compare')
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# gather_metrics(output_dir=output_dir, save_dir=save_dir)
plot_mean_accuracies(save_dir=save_dir, split_by_task=True)
# plot_accuracies(save_dir)
Beispiel #3
0
    _run.info['n_iter'] = train_callback.n_iter_
    _run.info['train_scores'] = train_callback.scores_
    _run.info['test_scores'] = test_callback.scores_

    estimator.fit(train_data,
                  train_targets,
                  study_weights=study_weights,
                  callback=callback)

    test_preds = estimator.predict(test_data)
    test_scores = {}
    for study in train_targets:
        test_scores[study] = accuracy_score(test_preds[study]['contrast'],
                                            test_targets[study]['contrast'])

    test_preds = target_encoder.inverse_transform(test_preds)
    test_targets = target_encoder.inverse_transform(test_targets)
    for study in test_preds:
        test_preds[study] = pd.concat([test_preds[study], test_targets[study]],
                                      axis=1,
                                      keys=['pred', 'true'],
                                      names=['target'])
    save_output(target_encoder, standard_scaler, estimator, test_preds)
    return test_scores


if __name__ == '__main__':
    output_dir = join(get_output_dir(), 'multi_studies')
    exp.observers.append(OurFileStorageObserver.create(basedir=output_dir))
    exp.run()
Beispiel #4
0

def run_exp(output_dir, config_updates, _id):
    """Boiler plate function that has to be put in every multiple
        experiment script, as exp does not pickle."""
    exp.run_command(
        'print_config',
        config_updates=config_updates,
    )
    # run = exp._create_run(config_updates=config_updates, )
    # run._id = _id
    # observer = OurFileStorageObserver.create(basedir=output_dir)
    # run.observers.append(observer)
    # run()


if __name__ == '__main__':
    output_dir = join(get_output_dir(), 'factored')
    exp.config(factored)
    config_updates = []
    config_updates += list(
        ParameterGrid({
            'data.studies': [['archi'], ['archi', 'hcp'], ['brainomics'],
                             ['brainomics', 'hcp'], ['camcan'],
                             ['camcan', 'hcp'], ['la5c'], ['la5c', 'hcp']],
        }))
    _id = get_id(output_dir)
    Parallel(n_jobs=1, verbose=100)(
        delayed(run_exp)(output_dir, config_update, _id=_id + i)
        for i, config_update in enumerate(config_updates))
Beispiel #5
0
            plot_stat_map(index_img(this_img, i), figure=fig, title=full_name)
            plt.savefig(join(output_dir, '%s.png' % full_name))
            plt.close(fig)


def compute_components(output_dir, lstsq):
    estimator = load(join(output_dir, 'estimator.pkl'))
    target_encoder = load(join(output_dir, 'target_encoder.pkl'))
    standard_scaler = load(join(output_dir, 'standard_scaler.pkl'))

    modl_atlas = fetch_atlas_modl()
    dictionary = modl_atlas['components512']
    components, names = maps_from_model(estimator,
                                        dictionary,
                                        target_encoder,
                                        standard_scaler,
                                        lstsq=lstsq)
    plot_dir = join(output_dir, 'plot')
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    dump(names, join(plot_dir, 'names.pkl'))
    for study, this_components in components.items():
        this_components.to_filename(
            join(plot_dir, 'components_%s.nii.gz' % study))
        plot_components(components, names, plot_dir)


if __name__ == '__main__':
    compute_components(join(get_output_dir(), 'multi_studies', '1'),
                       lstsq=True)
Beispiel #6
0
def run(estimator='multi_study',
        seed=0,
        plot=False,
        n_jobs=1,
        use_gpu=False,
        split_by_task=False,
        verbose=0):
    # Parameters
    system = dict(verbose=verbose,
                  n_jobs=n_jobs,
                  plot=plot,
                  seed=seed,
                  output_dir=None)
    data = dict(
        studies='all',
        dataset='loadings',  # Useful to override source directory
        test_size=0.5,
        train_size=0.5,
        reduced=True,
        data_dir=None,
    )
    model = dict(
        split_by_task=split_by_task,
        estimator=estimator,
        normalize=True,
        seed=100,
        target_study=None,
    )

    config = {'system': system, 'data': data, 'model': model}

    if model['estimator'] in ['multi_study', 'ensemble']:
        multi_study = dict(
            latent_size=128,
            weight_power=0.6,
            batch_size=128,
            init='resting-state',
            latent_dropout=0.75,
            input_dropout=0.25,
            device='cuda:0' if use_gpu else 'cpu',
            seed=100,
            lr={
                'pretrain': 1e-3,
                'train': 1e-3,
                'finetune': 1e-3
            },
            max_iter={
                'pretrain': 200,
                'train': 300,
                'finetune': 200
            },
        )
        config['multi_study'] = multi_study
        if model['estimator'] == 'ensemble':
            ensemble = dict(
                seed=100,
                n_runs=120,
                alpha=1e-5,
            )
            config['ensemble'] = ensemble
    else:
        logistic = dict(
            l2_penalty=np.logspace(-7, 0, 4).tolist(),
            max_iter=1000,
        )
        config['logistic'] = logistic

    output_dir = join(get_output_dir(config['system']['output_dir']),
                      'normalized',
                      'split_by_task' if split_by_task else 'split_by_study',
                      config['model']['estimator'],
                      str(config['system']['seed']))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    info = {}

    with open(join(output_dir, 'config.json'), 'w+') as f:
        json.dump(config, f)

    print("Loading data")
    if data['studies'] == 'all':
        studies = STUDY_LIST
    elif isinstance(data['studies'], str):
        studies = [data['studies']]
    elif isinstance(data['studies'], list):
        studies = data['studies']
    else:
        raise ValueError("Studies should be a list or 'all'")

    if data['dataset'] is not None:
        input_data, targets = load_from_directory(dataset=data['dataset'],
                                                  data_dir=data['data_dir'])
    elif data['reduced']:
        input_data, targets = load_reduced_loadings(data_dir=data['data_dir'])
    else:
        input_data, targets = load_masked_contrasts(data_dir=data['data_dir'])

    input_data = {study: input_data[study] for study in studies}
    targets = {study: targets[study] for study in studies}

    if model['split_by_task']:
        _, split_targets = split_studies(input_data, targets)
        target_encoder = MultiTargetEncoder().fit(split_targets)
    train_data, test_data, train_targets, test_targets = \
        train_test_split(input_data, targets, random_state=system['seed'],
                         test_size=data['test_size'],
                         train_size=data['train_size'])
    if model['split_by_task']:
        train_data, train_targets = split_studies(train_data, train_targets)
        test_data, test_targets = split_studies(test_data, test_targets)
        train_targets = target_encoder.transform(train_targets)
        test_targets = target_encoder.transform(test_targets)

    print("Setting up model")
    if model['normalize']:
        standard_scaler = MultiStandardScaler().fit(train_data)
        train_data = standard_scaler.transform(train_data)
        test_data = standard_scaler.transform(test_data)
    else:
        standard_scaler = None

    if model['estimator'] in ['multi_study', 'ensemble']:
        estimator = MultiStudyClassifier(verbose=system['verbose'],
                                         n_jobs=system['n_jobs'],
                                         **multi_study)
        if model['estimator'] == 'ensemble':
            memory = Memory(cachedir=None)
            estimator = EnsembleClassifier(estimator,
                                           n_jobs=system['n_jobs'],
                                           memory=memory,
                                           **ensemble)
            callback = None
        else:
            # Set some callback to obtain useful verbosity
            test_callback = ScoreCallback(Xs=test_data,
                                          ys=test_targets,
                                          score_function=accuracy_score)
            train_callback = ScoreCallback(Xs=train_data,
                                           ys=train_targets,
                                           score_function=accuracy_score)
            callback = MultiCallback({
                'train': train_callback,
                'test': test_callback
            })
            info['n_iter'] = train_callback.n_iter_
            info['train_scores'] = train_callback.scores_
            info['test_scores'] = test_callback.scores_
    elif model['estimator'] == 'logistic':
        estimator = MultiLogisticClassifier(verbose=system['verbose'],
                                            n_jobs=n_jobs,
                                            **logistic)
        callback = None

    print("Training model")
    estimator.fit(train_data, train_targets, callback=callback)

    print("Evaluating model")
    test_preds = estimator.predict(test_data)
    metrics = compute_metrics(test_preds, test_targets, target_encoder)
    print(metrics['accuracy'])

    print("Saving model")
    # Save model for further analysis
    dump(target_encoder, join(output_dir, 'target_encoder.pkl'))
    if model['normalize']:
        dump(standard_scaler, join(output_dir, 'standard_scaler.pkl'))
    dump(estimator, join(output_dir, 'estimator.pkl'))
    with open(join(output_dir, 'metrics.json'), 'w+') as f:
        json.dump(metrics, f)
    with open(join(output_dir, 'info.json'), 'w+') as f:
        json.dump(info, f)

    if config['system']['plot']:
        from utils.plotting import make_plots, prepare_plots
        print('Preparing plots')
        prepare_plots(output_dir)
        print("Plotting model")
        plot_components = config['model']['estimator'] in [
            'multi_study', 'ensemble'
        ]
        make_plots(output_dir,
                   plot_classifs=True,
                   plot_components=plot_components,
                   plot_surface=False,
                   plot_wordclouds=True,
                   n_jobs=config['system']['n_jobs'])
Beispiel #7
0
    for i, estimator in enumerate(estimators):
        if i == len(estimators) - 1:
            fontweight = 'bold'
        else:
            fontweight = 'normal'
        ax.annotate(y_labels[estimator], xy=(1, i), xytext=(10, 0),
                    textcoords="offset points",
                    fontweight=fontweight,
                    xycoords=('axes fraction', 'data'),
                    va='center',
                    ha='left')

    sns.despine(fig)

    plt.setp(ax.legend(), visible=False)
    plt.savefig(join(save_dir, 'accuracies.pdf'))
    plt.close(fig)


output_dir = get_output_dir(output_dir=None)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

save_dir = join(output_dir, 'compare')
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

gather_metrics(output_dir=output_dir, save_dir=save_dir)
plot_mean_accuracies(save_dir=save_dir)
plot_accuracies(save_dir)