Example #1
0
def test_bin_prop_ci():
    skip_if_no_external('scipy')
    n = 100
    succ_thresh = np.random.randint(n)
    acc = 1 - (float(succ_thresh) / n)
    bl = np.random.random(n) < acc
    ds = Dataset(bl)
    m95 = BinomialProportionCI()
    m50 = BinomialProportionCI(width=0.5)
    cids = m95(ds)
    assert_equal(cids.shape, (2, 1))
    # accuracy is in the CI
    maxdist = cids.samples[1, 0] - acc
    mindist = acc - cids.samples[1, 0]
    # but allow for numerical uncertainty proportional to the sample size
    assert_true(maxdist > 0 or maxdist <= 1. / n)
    assert_true(mindist > 0 or mindist <= 1. / n)
    # more than one feature
    ds = Dataset(np.transpose([bl, np.logical_not(bl)]))
    ci95 = m95(ds)
    assert_equal(ci95.shape, (2, 2))
    # CIs should be inverse
    assert_array_almost_equal(1 - ci95.samples[0, ::-1], ci95.samples[1])
    ci50 = m50(ds)
    assert_array_almost_equal(1 - ci50.samples[0, ::-1], ci50.samples[1])
    # 50% interval is smaller than 95%
    assert_true(np.all(ci95.samples[0] < ci50.samples[0]))
    assert_true(np.all(ci95.samples[1] > ci50.samples[1]))
    assert_equal(list(ci50.sa.ci_boundary), ['lower', 'upper'])
    def test_connectivity_hyperalignment(self):
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        dss_train, dss_test, surface = self.get_testdata()
        qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices')
        cha = ConnectivityHyperalignment(mask_ids=[0, 3, 6, 9],
                                         seed_indices=[0, 3, 6, 9],
                                         seed_queryengines=qe,
                                         queryengine=qe)
        mappers = cha(dss_train)
        aligned_train = [
            mapper.forward(ds) for ds, mapper in zip(dss_train, mappers)
        ]
        aligned_test = [
            mapper.forward(ds) for ds, mapper in zip(dss_test, mappers)
        ]
        for ds in aligned_train + aligned_test:
            zscore(ds, chunks_attr=None)
        sim_train_before = self.compute_connectivity_profile_similarity(
            dss_train)
        sim_train_after = self.compute_connectivity_profile_similarity(
            aligned_train)
        sim_test_before = self.compute_connectivity_profile_similarity(
            dss_test)
        sim_test_after = self.compute_connectivity_profile_similarity(
            aligned_test)
        # ISC should be higher after CHA for both training and testing data
        self.assertTrue(sim_train_after.mean() > sim_train_before.mean())
        self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
Example #3
0
def test_bin_prop_ci():
    skip_if_no_external('scipy')
    n = 100
    succ_thresh = np.random.randint(n)
    acc = 1 - (float(succ_thresh) / n)
    bl = np.random.random(n) < acc
    ds = Dataset(bl)
    m95 = BinomialProportionCI()
    m50 = BinomialProportionCI(width=0.5)
    cids = m95(ds)
    assert_equal(cids.shape, (2, 1))
    # accuracy is in the CI
    maxdist = cids.samples[1, 0] - acc
    mindist = acc - cids.samples[1, 0]
    # but allow for numerical uncertainty proportional to the sample size
    assert_true(maxdist > 0 or maxdist <= 1. / n)
    assert_true(mindist > 0 or mindist <= 1. / n)
    # more than one feature
    ds = Dataset(np.transpose([bl, np.logical_not(bl)]))
    ci95 = m95(ds)
    assert_equal(ci95.shape, (2, 2))
    # CIs should be inverse
    assert_array_almost_equal(1 - ci95.samples[0, ::-1], ci95.samples[1])
    ci50 = m50(ds)
    assert_array_almost_equal(1 - ci50.samples[0, ::-1], ci50.samples[1])
    # 50% interval is smaller than 95%
    assert_true(np.all(ci95.samples[0] < ci50.samples[0]))
    assert_true(np.all(ci95.samples[1] > ci50.samples[1]))
    assert_equal(list(ci50.sa.ci_boundary), ['lower', 'upper'])
Example #4
0
def test_cosmo_dataset(fn):
    skip_if_no_external('scipy', min_version='0.8')
    mat = _create_small_mat_dataset_dict()
    ds_mat = cosmo.from_any(mat)
    savemat(fn, mat)

    # test Dataset, filename, dict in matlab form, and input from loadmat
    for input in (ds_mat, fn, mat, loadmat(fn)):

        # check dataset creation
        ds = cosmo.from_any(mat)

        # ensure dataset has expected vlaues
        assert_array_equal(ds.samples, mat['samples'])

        _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets'])
        _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets'])
        _assert_set_equal(ds.a.keys(), ['name'])

        assert_array_equal(ds.a.name, 'input')
        assert_array_equal(ds.sa.chunks, [2, 2])
        assert_array_equal(ds.sa.targets, [1, 2])
        assert_array_equal(ds.sa.labels, ['yin', 'yan'])
        assert_array_equal(ds.fa.i, [3, 2, 1])
        assert_array_equal(ds.fa.j, [1, 2, 2])

        # check mapping to matlab format
        mat_mapped = cosmo.map2cosmo(ds)

        for m in (mat, mat_mapped):
            assert_array_equal(ds_mat.samples, m['samples'])
            _assert_ds_mat_attributes_equal(ds_mat, m)
Example #5
0
def test_fmri_to_cosmo():
    skip_if_no_external('nibabel')
    from mvpa2.datasets.mri import fmri_dataset
    # test exporting an fMRI dataset to CoSMoMVPA
    pymvpa_ds = fmri_dataset(
        samples=pathjoin(pymvpa_dataroot, 'example4d.nii.gz'),
        targets=[1, 2], sprefix='voxel')
    cosmomvpa_struct = cosmo.map2cosmo(pymvpa_ds)
    _assert_set_equal(cosmomvpa_struct.keys(), ['a', 'fa', 'sa', 'samples'])

    a_dict = dict(_obj2tup(cosmomvpa_struct['a']))
    mri_keys = ['imgaffine', 'voxel_eldim', 'voxel_dim']
    _assert_subset(mri_keys, a_dict.keys())

    for k in mri_keys:
        c_value = a_dict[k]
        p_value = pymvpa_ds.a[k].value

        if isinstance(p_value, tuple):
            c_value = c_value.ravel()
            p_value = np.asarray(p_value).ravel()

        assert_array_almost_equal(c_value, p_value)

    fa_dict = dict(_obj2tup(cosmomvpa_struct['fa']))
    fa_keys = ['voxel_indices']
    _assert_set_equal(fa_dict.keys(), fa_keys)
    for k in fa_keys:
        assert_array_almost_equal(fa_dict[k].T, pymvpa_ds.fa[k].value)
Example #6
0
def test_cosmo_queryengine(fn):
    skip_if_no_external('scipy', min_version='0.8')
    nbrhood_mat = _create_small_mat_nbrhood_dict()
    neighbors = nbrhood_mat['neighbors']
    savemat(fn, nbrhood_mat)

    # test dict in matlab form, filename, and through QueryEngine loader
    for input in (nbrhood_mat, fn, cosmo.CosmoQueryEngine.from_mat(neighbors)):
        qe = cosmo.from_any(input)
        assert_array_equal(qe.ids, [0, 1, 2, 3])

        for i in qe.ids:
            nbr_fids_base0 = neighbors[0, i][0] - 1
            assert_array_equal(qe.query_byid(i), nbr_fids_base0)

        _assert_ds_mat_attributes_equal(qe, nbrhood_mat, ('fa', 'a'))
Example #7
0
def test_cosmo_queryengine(fn):
    skip_if_no_external('scipy', min_version='0.8')
    nbrhood_mat = _create_small_mat_nbrhood_dict()
    neighbors = nbrhood_mat['neighbors']
    savemat(fn, nbrhood_mat)

    # test dict in matlab form, filename, and through QueryEngine loader
    for input in (nbrhood_mat, fn, cosmo.CosmoQueryEngine.from_mat(neighbors)):
        qe = cosmo.from_any(input)
        assert_array_equal(qe.ids, [0, 1, 2, 3])

        for i in qe.ids:
            nbr_fids_base0 = neighbors[0, i][0] - 1
            assert_array_equal(qe.query_byid(i), nbr_fids_base0)

        _assert_ds_mat_attributes_equal(qe, nbrhood_mat, ('fa', 'a'))
Example #8
0
def test_cosmo_io_h5py(fn):
    skip_if_no_external('h5py')
    from mvpa2.base.hdf5 import h5save, h5load

    # Dataset from cosmo
    ds = cosmo.from_any(_create_small_mat_dataset_dict())
    h5save(fn, ds)
    ds_loaded = h5load(fn)

    _assert_ds_equal(ds, ds_loaded)

    # Queryengine
    qe = cosmo.from_any(_create_small_mat_nbrhood_dict())
    h5save(fn, qe)
    qe_loaded = h5load(fn)

    assert_array_equal(qe.ids, qe_loaded.ids)
    _assert_array_collectable_equal(qe.a, qe_loaded.a)
    _assert_array_collectable_equal(qe.fa, qe_loaded.fa)
Example #9
0
def test_cosmo_io_h5py(fn):
    skip_if_no_external('h5py')
    from mvpa2.base.hdf5 import h5save, h5load

    # Dataset from cosmo
    ds = cosmo.from_any(_create_small_mat_dataset_dict())
    h5save(fn, ds)
    ds_loaded = h5load(fn)

    _assert_ds_equal(ds, ds_loaded)

    # Queryengine
    qe = cosmo.from_any(_create_small_mat_nbrhood_dict())
    h5save(fn, qe)
    qe_loaded = h5load(fn)

    assert_array_equal(qe.ids, qe_loaded.ids)
    _assert_array_collectable_equal(qe.a, qe_loaded.a)
    _assert_array_collectable_equal(qe.fa, qe_loaded.fa)
Example #10
0
def test_cosmo_dataset(fn):
    skip_if_no_external('scipy', min_version='0.8')
    mat = _create_small_mat_dataset_dict()
    ds_mat = cosmo.from_any(mat)
    savemat(fn, mat)

    # test Dataset, filename, dict in matlab form, and input from loadmat
    for input in (ds_mat, fn, mat, loadmat(fn)):

        # check dataset creation
        ds = cosmo.from_any(mat)

        # ensure dataset has expected vlaues
        assert_array_equal(ds.samples, mat['samples'])

        _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets'])
        _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets'])
        _assert_set_equal(ds.a.keys(), ['name', 'size'])

        assert_array_equal(ds.a.name, 'input')
        assert_array_equal(ds.a.size, [3, 2, 1])
        assert_array_equal(ds.sa.chunks, [2, 2])
        assert_array_equal(ds.sa.targets, [1, 2])
        assert_array_equal(ds.sa.labels, ['yin', 'yan'])
        assert_array_equal(ds.fa.i, [3, 2, 1])
        assert_array_equal(ds.fa.j, [1, 2, 2])

        for convert_tuples in (True, False):
            ds_copy = ds.copy(deep=True)

            if convert_tuples:
                # use dataset with tuple data
                ds_copy.a.size = tuple(ds_copy.a.size)

            # check mapping to matlab format
            mat_mapped = cosmo.map2cosmo(ds_copy)

            for m in (mat, mat_mapped):
                assert_array_equal(ds_mat.samples, m['samples'])
                _assert_ds_mat_attributes_equal(ds_mat, m)
    def test_connectivity_hyperalignment(self):
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        dss_train, dss_test, surface = self.get_testdata()
        qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices')
        cha = ConnectivityHyperalignment(
            mask_ids=[0, 3, 6, 9],
            seed_indices=[0, 3, 6, 9],
            seed_queryengines=qe,
            queryengine=qe)
        mappers = cha(dss_train)
        aligned_train = [mapper.forward(ds) for ds, mapper in zip(dss_train, mappers)]
        aligned_test = [mapper.forward(ds) for ds, mapper in zip(dss_test, mappers)]
        for ds in aligned_train + aligned_test:
            zscore(ds, chunks_attr=None)
        sim_train_before = self.compute_connectivity_profile_similarity(dss_train)
        sim_train_after = self.compute_connectivity_profile_similarity(aligned_train)
        sim_test_before = self.compute_connectivity_profile_similarity(dss_test)
        sim_test_after = self.compute_connectivity_profile_similarity(aligned_test)
        # ISC should be higher after CHA for both training and testing data
        self.assertTrue(sim_train_after.mean() > sim_train_before.mean())
        self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
Example #12
0
def test_simple_cluster_level_thresholding():
    nf = 13
    nperms = 100
    pthr_feature = 0.5  # just for testing
    pthr_cluster = 0.5
    rand_acc = np.random.normal(size=(nperms, nf))
    acc = np.random.normal(size=(1, nf))

    # Step 1 is to "fit" "Nonparametrics" per each of the features
    from mvpa2.clfs.stats import Nonparametric
    dists = [Nonparametric(samples) for samples in rand_acc.T]
    # we should be able to assert "p" value for each random sample for each feature
    rand_acc_p = np.array([dist.rcdf(v)
                           for dist, v in zip(dists, rand_acc.T)]).T

    rand_acc_p_slow = np.array(
        [[dist.rcdf(v) for dist, v in zip(dists, sample)]
         for sample in rand_acc])
    assert_array_equal(rand_acc_p_slow, rand_acc_p)

    assert_equal(rand_acc_p.shape, rand_acc.shape)
    assert (np.all(rand_acc_p <= 1))
    assert (np.all(rand_acc_p > 0))

    # 2: apply the same to our acc
    acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :]
    assert (np.all(acc_p <= 1))
    assert (np.all(acc_p > 0))

    skip_if_no_external('scipy')
    # Now we need to do our fancy cluster level madness
    from mvpa2.algorithms.group_clusterthr import \
        get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \
        get_thresholding_map, repeat_cluster_vals

    rand_acc_p_thr = rand_acc_p < pthr_feature
    acc_p_thr = acc_p < pthr_feature

    rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr)
    acc_cluster_sizes = get_cluster_sizes(acc_p_thr)

    # This is how we can compute it within present implementation.
    # It will be a bit different (since it doesn't account for target value if
    # I got it right), and would work only for accuracies
    thr_map = get_thresholding_map(rand_acc, pthr_feature)
    rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map)
    acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map)

    assert_equal(rand_cluster_sizes, rand_cluster_sizes_)
    assert_equal(acc_cluster_sizes, acc_cluster_sizes_)

    #print rand_cluster_sizes
    #print acc_cluster_sizes

    # That is how it is done in group_clusterthr atm
    # store cluster size histogram for later p-value evaluation
    # use a sparse matrix for easy consumption (max dim is the number of
    # features, i.e. biggest possible cluster)
    from scipy.sparse import dok_matrix
    scl = dok_matrix((1, nf + 1), dtype=int)
    for s in rand_cluster_sizes:
        scl[0, s] = rand_cluster_sizes[s]

    test_count_sizes = repeat_cluster_vals(acc_cluster_sizes)
    test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float'))
    # needs conversion to array for comparisons
    test_pvals = np.asanyarray(test_pvals)
    # critical cluster_level threshold (without FW correction between clusters)
    # would be
    clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster]

    if len(clusters_passed_threshold):
        thr_cluster_size = min(clusters_passed_threshold)
        #print("Min cluster size which passed threshold: %d" % thr_cluster_size)
    else:
        #print("No clusters passed threshold")
        pass
    #print test_count_sizes, test_pvals

    acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes)

    for test_pval, test_count_size in zip(test_pvals, test_count_sizes):
        assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
Example #13
0
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##

"""Unit tests for CoSMoMVPA dataset (http://cosmomvpa.org)"""

from mvpa2.testing.tools import assert_raises, ok_, assert_true, \
        assert_equal, assert_array_equal, with_tempfile
from mvpa2.testing import skip_if_no_external

skip_if_no_external('scipy')
from scipy.io import loadmat, savemat, matlab
from mvpa2.datasets import cosmo

from mvpa2.measures.base import Measure
from mvpa2.datasets.base import Dataset
from mvpa2.mappers.fx import mean_feature

import numpy as np

arr = np.asarray

#########################
# helper functions

Example #14
0
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##

"""Unit tests for CoSMoMVPA dataset (http://cosmomvpa.org)"""

from mvpa2.testing.tools import assert_raises, ok_, assert_true, \
    assert_equal, assert_array_equal, with_tempfile
from mvpa2.testing import skip_if_no_external

skip_if_no_external('scipy')
from scipy.io import loadmat, savemat, matlab
from mvpa2.datasets import cosmo

from mvpa2.measures.base import Measure
from mvpa2.datasets.base import Dataset
from mvpa2.mappers.fx import mean_feature

import numpy as np

arr = np.asarray

#########################
# helper functions

Example #15
0
def test_simple_cluster_level_thresholding():
    nf = 13
    nperms = 100
    pthr_feature = 0.5  # just for testing
    pthr_cluster = 0.5
    rand_acc = np.random.normal(size=(nperms, nf))
    acc = np.random.normal(size=(1, nf))

    # Step 1 is to "fit" "Nonparametrics" per each of the features
    from mvpa2.clfs.stats import Nonparametric
    dists = [Nonparametric(samples) for samples in rand_acc.T]
    # we should be able to assert "p" value for each random sample for each feature
    rand_acc_p = np.array(
        [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]
        ).T

    rand_acc_p_slow = np.array([
        [dist.rcdf(v) for dist, v in zip(dists, sample)]
         for sample in rand_acc])
    assert_array_equal(rand_acc_p_slow, rand_acc_p)

    assert_equal(rand_acc_p.shape, rand_acc.shape)
    assert(np.all(rand_acc_p <= 1))
    assert(np.all(rand_acc_p > 0))

    # 2: apply the same to our acc
    acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :]
    assert(np.all(acc_p <= 1))
    assert(np.all(acc_p > 0))

    skip_if_no_external('scipy')
    # Now we need to do our fancy cluster level madness
    from mvpa2.algorithms.group_clusterthr import \
        get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \
        get_thresholding_map, repeat_cluster_vals

    rand_acc_p_thr = rand_acc_p < pthr_feature
    acc_p_thr = acc_p < pthr_feature

    rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr)
    acc_cluster_sizes = get_cluster_sizes(acc_p_thr)

    # This is how we can compute it within present implementation.
    # It will be a bit different (since it doesn't account for target value if
    # I got it right), and would work only for accuracies
    thr_map = get_thresholding_map(rand_acc, pthr_feature)
    rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map)
    acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map)

    assert_equal(rand_cluster_sizes, rand_cluster_sizes_)
    assert_equal(acc_cluster_sizes, acc_cluster_sizes_)

    #print rand_cluster_sizes
    #print acc_cluster_sizes

    # That is how it is done in group_clusterthr atm
    # store cluster size histogram for later p-value evaluation
    # use a sparse matrix for easy consumption (max dim is the number of
    # features, i.e. biggest possible cluster)
    from scipy.sparse import dok_matrix
    scl = dok_matrix((1, nf + 1), dtype=int)
    for s in rand_cluster_sizes:
        scl[0, s] = rand_cluster_sizes[s]

    test_count_sizes = repeat_cluster_vals(acc_cluster_sizes)
    test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float'))
    # needs conversion to array for comparisons
    test_pvals = np.asanyarray(test_pvals)
    # critical cluster_level threshold (without FW correction between clusters)
    # would be
    clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster]

    if len(clusters_passed_threshold):
        thr_cluster_size = min(clusters_passed_threshold)
        #print("Min cluster size which passed threshold: %d" % thr_cluster_size)
    else:
        #print("No clusters passed threshold")
        pass
    #print test_count_sizes, test_pvals


    acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes)

    for test_pval, test_count_size in zip(test_pvals, test_count_sizes):
        assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
Example #16
0
 def test_to_df_smoke(self, ds):
     skip_if_no_external('pandas')
     df = ds.to_df()
     print(df)