def test_bin_prop_ci(): skip_if_no_external('scipy') n = 100 succ_thresh = np.random.randint(n) acc = 1 - (float(succ_thresh) / n) bl = np.random.random(n) < acc ds = Dataset(bl) m95 = BinomialProportionCI() m50 = BinomialProportionCI(width=0.5) cids = m95(ds) assert_equal(cids.shape, (2, 1)) # accuracy is in the CI maxdist = cids.samples[1, 0] - acc mindist = acc - cids.samples[1, 0] # but allow for numerical uncertainty proportional to the sample size assert_true(maxdist > 0 or maxdist <= 1. / n) assert_true(mindist > 0 or mindist <= 1. / n) # more than one feature ds = Dataset(np.transpose([bl, np.logical_not(bl)])) ci95 = m95(ds) assert_equal(ci95.shape, (2, 2)) # CIs should be inverse assert_array_almost_equal(1 - ci95.samples[0, ::-1], ci95.samples[1]) ci50 = m50(ds) assert_array_almost_equal(1 - ci50.samples[0, ::-1], ci50.samples[1]) # 50% interval is smaller than 95% assert_true(np.all(ci95.samples[0] < ci50.samples[0])) assert_true(np.all(ci95.samples[1] > ci50.samples[1])) assert_equal(list(ci50.sa.ci_boundary), ['lower', 'upper'])
def test_connectivity_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('hdf5') # needed for default results backend hdf5 dss_train, dss_test, surface = self.get_testdata() qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices') cha = ConnectivityHyperalignment(mask_ids=[0, 3, 6, 9], seed_indices=[0, 3, 6, 9], seed_queryengines=qe, queryengine=qe) mappers = cha(dss_train) aligned_train = [ mapper.forward(ds) for ds, mapper in zip(dss_train, mappers) ] aligned_test = [ mapper.forward(ds) for ds, mapper in zip(dss_test, mappers) ] for ds in aligned_train + aligned_test: zscore(ds, chunks_attr=None) sim_train_before = self.compute_connectivity_profile_similarity( dss_train) sim_train_after = self.compute_connectivity_profile_similarity( aligned_train) sim_test_before = self.compute_connectivity_profile_similarity( dss_test) sim_test_after = self.compute_connectivity_profile_similarity( aligned_test) # ISC should be higher after CHA for both training and testing data self.assertTrue(sim_train_after.mean() > sim_train_before.mean()) self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
def test_cosmo_dataset(fn): skip_if_no_external('scipy', min_version='0.8') mat = _create_small_mat_dataset_dict() ds_mat = cosmo.from_any(mat) savemat(fn, mat) # test Dataset, filename, dict in matlab form, and input from loadmat for input in (ds_mat, fn, mat, loadmat(fn)): # check dataset creation ds = cosmo.from_any(mat) # ensure dataset has expected vlaues assert_array_equal(ds.samples, mat['samples']) _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets']) _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets']) _assert_set_equal(ds.a.keys(), ['name']) assert_array_equal(ds.a.name, 'input') assert_array_equal(ds.sa.chunks, [2, 2]) assert_array_equal(ds.sa.targets, [1, 2]) assert_array_equal(ds.sa.labels, ['yin', 'yan']) assert_array_equal(ds.fa.i, [3, 2, 1]) assert_array_equal(ds.fa.j, [1, 2, 2]) # check mapping to matlab format mat_mapped = cosmo.map2cosmo(ds) for m in (mat, mat_mapped): assert_array_equal(ds_mat.samples, m['samples']) _assert_ds_mat_attributes_equal(ds_mat, m)
def test_fmri_to_cosmo(): skip_if_no_external('nibabel') from mvpa2.datasets.mri import fmri_dataset # test exporting an fMRI dataset to CoSMoMVPA pymvpa_ds = fmri_dataset( samples=pathjoin(pymvpa_dataroot, 'example4d.nii.gz'), targets=[1, 2], sprefix='voxel') cosmomvpa_struct = cosmo.map2cosmo(pymvpa_ds) _assert_set_equal(cosmomvpa_struct.keys(), ['a', 'fa', 'sa', 'samples']) a_dict = dict(_obj2tup(cosmomvpa_struct['a'])) mri_keys = ['imgaffine', 'voxel_eldim', 'voxel_dim'] _assert_subset(mri_keys, a_dict.keys()) for k in mri_keys: c_value = a_dict[k] p_value = pymvpa_ds.a[k].value if isinstance(p_value, tuple): c_value = c_value.ravel() p_value = np.asarray(p_value).ravel() assert_array_almost_equal(c_value, p_value) fa_dict = dict(_obj2tup(cosmomvpa_struct['fa'])) fa_keys = ['voxel_indices'] _assert_set_equal(fa_dict.keys(), fa_keys) for k in fa_keys: assert_array_almost_equal(fa_dict[k].T, pymvpa_ds.fa[k].value)
def test_cosmo_queryengine(fn): skip_if_no_external('scipy', min_version='0.8') nbrhood_mat = _create_small_mat_nbrhood_dict() neighbors = nbrhood_mat['neighbors'] savemat(fn, nbrhood_mat) # test dict in matlab form, filename, and through QueryEngine loader for input in (nbrhood_mat, fn, cosmo.CosmoQueryEngine.from_mat(neighbors)): qe = cosmo.from_any(input) assert_array_equal(qe.ids, [0, 1, 2, 3]) for i in qe.ids: nbr_fids_base0 = neighbors[0, i][0] - 1 assert_array_equal(qe.query_byid(i), nbr_fids_base0) _assert_ds_mat_attributes_equal(qe, nbrhood_mat, ('fa', 'a'))
def test_cosmo_io_h5py(fn): skip_if_no_external('h5py') from mvpa2.base.hdf5 import h5save, h5load # Dataset from cosmo ds = cosmo.from_any(_create_small_mat_dataset_dict()) h5save(fn, ds) ds_loaded = h5load(fn) _assert_ds_equal(ds, ds_loaded) # Queryengine qe = cosmo.from_any(_create_small_mat_nbrhood_dict()) h5save(fn, qe) qe_loaded = h5load(fn) assert_array_equal(qe.ids, qe_loaded.ids) _assert_array_collectable_equal(qe.a, qe_loaded.a) _assert_array_collectable_equal(qe.fa, qe_loaded.fa)
def test_cosmo_dataset(fn): skip_if_no_external('scipy', min_version='0.8') mat = _create_small_mat_dataset_dict() ds_mat = cosmo.from_any(mat) savemat(fn, mat) # test Dataset, filename, dict in matlab form, and input from loadmat for input in (ds_mat, fn, mat, loadmat(fn)): # check dataset creation ds = cosmo.from_any(mat) # ensure dataset has expected vlaues assert_array_equal(ds.samples, mat['samples']) _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets']) _assert_set_equal(ds.sa.keys(), ['chunks', 'labels', 'targets']) _assert_set_equal(ds.a.keys(), ['name', 'size']) assert_array_equal(ds.a.name, 'input') assert_array_equal(ds.a.size, [3, 2, 1]) assert_array_equal(ds.sa.chunks, [2, 2]) assert_array_equal(ds.sa.targets, [1, 2]) assert_array_equal(ds.sa.labels, ['yin', 'yan']) assert_array_equal(ds.fa.i, [3, 2, 1]) assert_array_equal(ds.fa.j, [1, 2, 2]) for convert_tuples in (True, False): ds_copy = ds.copy(deep=True) if convert_tuples: # use dataset with tuple data ds_copy.a.size = tuple(ds_copy.a.size) # check mapping to matlab format mat_mapped = cosmo.map2cosmo(ds_copy) for m in (mat, mat_mapped): assert_array_equal(ds_mat.samples, m['samples']) _assert_ds_mat_attributes_equal(ds_mat, m)
def test_connectivity_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('hdf5') # needed for default results backend hdf5 dss_train, dss_test, surface = self.get_testdata() qe = SurfaceQueryEngine(surface, 10, fa_node_key='node_indices') cha = ConnectivityHyperalignment( mask_ids=[0, 3, 6, 9], seed_indices=[0, 3, 6, 9], seed_queryengines=qe, queryengine=qe) mappers = cha(dss_train) aligned_train = [mapper.forward(ds) for ds, mapper in zip(dss_train, mappers)] aligned_test = [mapper.forward(ds) for ds, mapper in zip(dss_test, mappers)] for ds in aligned_train + aligned_test: zscore(ds, chunks_attr=None) sim_train_before = self.compute_connectivity_profile_similarity(dss_train) sim_train_after = self.compute_connectivity_profile_similarity(aligned_train) sim_test_before = self.compute_connectivity_profile_similarity(dss_test) sim_test_after = self.compute_connectivity_profile_similarity(aligned_test) # ISC should be higher after CHA for both training and testing data self.assertTrue(sim_train_after.mean() > sim_train_before.mean()) self.assertTrue(sim_test_after.mean() > sim_test_before.mean())
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]).T rand_acc_p_slow = np.array( [[dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert (np.all(rand_acc_p <= 1)) assert (np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert (np.all(acc_p <= 1)) assert (np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## # # See COPYING file distributed along with the PyMVPA package for the # copyright and license terms. # ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Unit tests for CoSMoMVPA dataset (http://cosmomvpa.org)""" from mvpa2.testing.tools import assert_raises, ok_, assert_true, \ assert_equal, assert_array_equal, with_tempfile from mvpa2.testing import skip_if_no_external skip_if_no_external('scipy') from scipy.io import loadmat, savemat, matlab from mvpa2.datasets import cosmo from mvpa2.measures.base import Measure from mvpa2.datasets.base import Dataset from mvpa2.mappers.fx import mean_feature import numpy as np arr = np.asarray ######################### # helper functions
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array( [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)] ).T rand_acc_p_slow = np.array([ [dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert(np.all(rand_acc_p <= 1)) assert(np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert(np.all(acc_p <= 1)) assert(np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
def test_to_df_smoke(self, ds): skip_if_no_external('pandas') df = ds.to_df() print(df)