def test_surface_face_normal(self, do_deterioriate_surface): vec1 = np.random.normal(size=(3,)) vec2 = np.random.normal(size=(3,)) vec_normal = -np.cross(vec1, vec2) plane = generate_plane((0, 0, 0), vec1, vec2, 10, 10) if do_deterioriate_surface: plane = SurfingSurfaceTests.deterioriate_surface(plane) plane_face_normals = plane.face_normals has_non_nan = False for f_n in plane_face_normals: if np.any(np.isnan(f_n)): continue assert_vector_direction_almost_equal(f_n, vec_normal, decimal=0) assert_almost_equal(f_n, surf.normalized( plane.nanmean_face_normal), decimal=0) has_non_nan = True if not has_non_nan: assert False, "Test should include faces with non-NaN normals"
def test_balancer(): ds = give_data() # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3, )) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # same but include all offlimit samples bal = Balancer(limit={'chunks': 3}, include_offlimit=True, apply_selection=True) res = bal(ds) assert_array_equal(res.sa['chunks'].unique, range(10)) # chunk three still balanced, but the rest is not, i.e. all samples included assert_equal( get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(), [2] * 4) assert_equal( get_nelements_per_value(res.sa.chunks).values(), [10, 10, 10, 8, 10, 10, 10, 10, 10, 10]) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round( np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1, 2], 5) ds.fa['chk'] = np.repeat([1, 2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def test_balancer(): ds = give_data() # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3,)) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # same but include all offlimit samples bal = Balancer(limit={'chunks': 3}, include_offlimit=True, apply_selection=True) res = bal(ds) assert_array_equal(res.sa['chunks'].unique, range(10)) # chunk three still balanced, but the rest is not, i.e. all samples included assert_equal(get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(), [2] * 4) assert_equal(get_nelements_per_value(res.sa.chunks).values(), [10, 10, 10, 8, 10, 10, 10, 10, 10, 10]) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1,2], 5) ds.fa['chk'] = np.repeat([1,2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def test_average_node_edge_length_tiny(self): a = np.random.uniform(low=2, high=5) b = np.random.uniform(low=2, high=5) c = (a ** 2 + b ** 2) ** .5 vertices = [(0, 0, 0), (0, 0, a), (0, b, 0)] faces = [(0, 1, 2)] s = Surface(vertices, faces) expected_avg = [(a + b) / 2, (a + c) / 2, (b + c) / 2] assert_almost_equal(s.average_node_edge_length, expected_avg)
def test_mean_tpr(): # Let's test now on some disbalanced sets assert_raises(ValueError, mean_tpr, [1], []) assert_raises(ValueError, mean_tpr, [], [1]) assert_raises(ValueError, mean_tpr, [], []) # now interesting one where there were no target when it was in predicted assert_raises(ValueError, mean_tpr, [1], [0]) assert_raises(ValueError, mean_tpr, [0, 1], [0, 0]) # but it should be ok to have some targets not present in prediction assert_equal(mean_tpr([0, 0], [0, 1]), .5) # the same regardless how many samples in 0-class, if all misclassified # (winner by # of samples takes all) assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5) # whenever mean-accuracy would be different assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2 / 3.)
def test_mean_tpr(): # Let's test now on some disbalanced sets assert_raises(ValueError, mean_tpr, [1], []) assert_raises(ValueError, mean_tpr, [], [1]) assert_raises(ValueError, mean_tpr, [], []) # now interesting one where there were no target when it was in predicted assert_raises(ValueError, mean_tpr, [1], [0]) assert_raises(ValueError, mean_tpr, [0, 1], [0, 0]) # but it should be ok to have some targets not present in prediction assert_equal(mean_tpr([0, 0], [0, 1]), .5) # the same regardless how many samples in 0-class, if all misclassified # (winner by # of samples takes all) assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5) # whenever mean-accuracy would be different assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2/3.)
def test_mean_tpr_balanced(): # in case of the balanced sets we should expect to match mean_match_accuracy for nclass in range(2, 4): for nsample in range(1, 3): target = np.repeat(np.arange(nclass), nsample) # perfect match assert_equal(mean_match_accuracy(target, target), 1.0) assert_equal(mean_tpr(target, target), 1.0) # perfect mismatch -- shift by nsample, so no target matches estimate = np.roll(target, nsample) assert_equal(mean_match_accuracy(target, estimate), 0) assert_equal(mean_tpr(target, estimate), 0) # do few permutations and see if both match for i in range(5): np.random.shuffle(estimate) assert_equal(mean_tpr(target, estimate), mean_match_accuracy(target, estimate)) assert_almost_equal(mean_tpr(target, estimate), 1 - mean_fnr(target, estimate))
def test_mean_tpr_balanced(): # in case of the balanced sets we should expect to match mean_match_accuracy for nclass in range(2, 4): for nsample in range(1, 3): target = np.repeat(np.arange(nclass), nsample) # perfect match assert_equal(mean_match_accuracy(target, target), 1.0) assert_equal(mean_tpr(target, target), 1.0) # perfect mismatch -- shift by nsample, so no target matches estimate = np.roll(target, nsample) assert_equal(mean_match_accuracy(target, estimate), 0) assert_equal(mean_tpr(target, estimate), 0) # do few permutations and see if both match for i in range(5): np.random.shuffle(estimate) assert_equal( mean_tpr(target, estimate), mean_match_accuracy(target, estimate)) assert_almost_equal( mean_tpr(target, estimate), 1-mean_fnr(target, estimate))
def _assert_rotation_maps_vector(r, x, y): # rotation must be 3x3 numpy array assert_equal(r.shape, (3, 3)) assert_is_instance(r, np.ndarray) # rotation applied to x must yield direction of y # (modulo rounding errors) def normed(v): n_v = np.linalg.norm(v) return 0 if n_v == 0 else v / n_v rx = r.dot(x) rx_normed = normed(rx) y_normed = normed(y) assert_vector_direction_almost_equal(rx_normed, y_normed) # since it is a rotation, the result must have the same # L2 norm as the input assert_almost_equal(np.linalg.norm(x), np.linalg.norm(rx))
def test_gifti_dataset_with_anatomical_surface(fn, format_, include_nodes): ds = _get_test_dataset(include_nodes) nsamples, nfeatures = ds.shape vertices = np.random.normal(size=(nfeatures, 3)) faces = np.asarray([i + np.arange(3) for i in range(2 * nfeatures)]) % nfeatures surf = Surface(vertices, faces) img = map2gifti(ds, surface=surf) arr_index = 0 if include_nodes: # check node indices node_arr = img.darrays[arr_index] assert_equal(node_arr.intent, intent_codes.code['NIFTI_INTENT_NODE_INDEX']) assert_equal(node_arr.coordsys, None) assert_equal(node_arr.data.dtype, np.int32) assert_equal(node_arr.datatype, data_type_codes['int32']) arr_index += 1 for sample in ds.samples: # check sample content arr = img.darrays[arr_index] data = arr.data assert_almost_equal(data, sample) assert_equal(arr.coordsys, None) assert_equal(arr.data.dtype, np.float32) assert_equal(arr.datatype, data_type_codes['float32']) arr_index += 1 # check vertices vertex_arr = img.darrays[arr_index] assert_almost_equal(vertex_arr.data, vertices) assert_equal(vertex_arr.data.dtype, np.float32) assert_equal(vertex_arr.datatype, data_type_codes['float32']) # check faces arr_index += 1 face_arr = img.darrays[arr_index] assert_almost_equal(face_arr.data, faces) assert_equal(face_arr.data.dtype, np.int32) assert_equal(face_arr.datatype, data_type_codes['int32']) # getting the functional data should ignore faces and vertices ds_again = gifti_dataset(img) assert_datasets_almost_equal(ds, ds_again)
def test_gifti_dataset_with_anatomical_surface(fn, format_, include_nodes): ds = _get_test_dataset(include_nodes) nsamples, nfeatures = ds.shape vertices = np.random.normal(size=(nfeatures, 3)) faces = np.asarray([i + np.arange(3) for i in xrange(2 * nfeatures)]) % nfeatures surf = Surface(vertices, faces) img = map2gifti(ds, surface=surf) arr_index = 0 if include_nodes: # check node indices node_arr = img.darrays[arr_index] assert_equal(node_arr.intent, intent_codes.code['NIFTI_INTENT_NODE_INDEX']) assert_equal(node_arr.coordsys, None) assert_equal(node_arr.data.dtype, np.int32) assert_equal(node_arr.datatype, data_type_codes['int32']) arr_index += 1 for sample in ds.samples: # check sample content arr = img.darrays[arr_index] data = arr.data assert_almost_equal(data, sample) assert_equal(arr.coordsys, None) assert_equal(arr.data.dtype, np.float32) assert_equal(arr.datatype, data_type_codes['float32']) arr_index += 1 # check vertices vertex_arr = img.darrays[arr_index] assert_almost_equal(vertex_arr.data, vertices) assert_equal(vertex_arr.data.dtype, np.float32) assert_equal(vertex_arr.datatype, data_type_codes['float32']) # check faces arr_index += 1 face_arr = img.darrays[arr_index] assert_almost_equal(face_arr.data, faces) assert_equal(face_arr.data.dtype, np.int32) assert_equal(face_arr.datatype, data_type_codes['int32']) # getting the functional data should ignore faces and vertices ds_again = gifti_dataset(img) assert_datasets_almost_equal(ds, ds_again)
def test_balancer(): ds = give_data() ds.sa['ids'] = np.arange(len(ds)) # some sa to ease tracking of samples # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # if we rerun again, it would be a different selection res2 = bal(ds) assert_true(np.any(res.sa.ids != bal(ds).sa.ids)) # but if we create a balancer providing seed rng int, # should be identical results bal = Balancer(apply_selection=True, count=5, rng=1) assert_false(np.any(bal(ds).sa.ids != bal(ds).sa.ids)) # But results should differ if we use .generate to produce those multiple # balanced datasets b = Balancer(apply_selection=True, count=3, rng=1) balanced = list(b.generate(ds)) assert_false(all(balanced[0].sa.ids == balanced[1].sa.ids)) assert_false(all(balanced[0].sa.ids == balanced[2].sa.ids)) assert_false(all(balanced[1].sa.ids == balanced[2].sa.ids)) # And should be exactly the same for ds_a, ds_b in zip(balanced, b.generate(ds)): assert_datasets_equal(ds_a, ds_b) # Contribution by Chris Markiewicz # And interleaving __call__ and generator fetches gen1 = b.generate(ds) gen2 = b.generate(ds) seq1, seq2, seq3 = [], [], [] for i in xrange(3): seq1.append(gen1.next()) seq2.append(gen2.next()) seq3.append(b(ds)) # Produces expected sequences for i in xrange(3): assert_datasets_equal(balanced[i], seq1[i]) assert_datasets_equal(balanced[i], seq2[i]) # And all __call__s return the same result ds_a = seq3[0] for ds_b in seq3[1:]: assert_array_equal(ds_a.sa.ids, ds_b.sa.ids) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3,)) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # same but include all offlimit samples bal = Balancer(limit={'chunks': 3}, include_offlimit=True, apply_selection=True) res = bal(ds) assert_array_equal(res.sa['chunks'].unique, range(10)) # chunk three still balanced, but the rest is not, i.e. all samples included assert_equal(get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(), [2] * 4) assert_equal(get_nelements_per_value(res.sa.chunks).values(), [10, 10, 10, 8, 10, 10, 10, 10, 10, 10]) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1, 2], 5) ds.fa['chk'] = np.repeat([1, 2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def assert_vector_direction_almost_equal(x, y, *args, **kwargs): n_x = np.linalg.norm(x) n_y = np.linalg.norm(y) assert_almost_equal(np.dot(x, y), n_x * n_y, *args, **kwargs)
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]).T rand_acc_p_slow = np.array( [[dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert (np.all(rand_acc_p <= 1)) assert (np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert (np.all(acc_p <= 1)) assert (np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
def test_balancer(): ds = give_data() ds.sa['ids'] = np.arange(len(ds)) # some sa to ease tracking of samples # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # if we rerun again, it would be a different selection res2 = bal(ds) assert_true(np.any(res.sa.ids != bal(ds).sa.ids)) # but if we create a balancer providing seed rng int, # should be identical results bal = Balancer(apply_selection=True, count=5, rng=1) assert_false(np.any(bal(ds).sa.ids != bal(ds).sa.ids)) # But results should differ if we use .generate to produce those multiple # balanced datasets b = Balancer(apply_selection=True, count=3, rng=1) balanced = list(b.generate(ds)) assert_false(all(balanced[0].sa.ids == balanced[1].sa.ids)) assert_false(all(balanced[0].sa.ids == balanced[2].sa.ids)) assert_false(all(balanced[1].sa.ids == balanced[2].sa.ids)) # And should be exactly the same for ds_a, ds_b in zip(balanced, b.generate(ds)): assert_datasets_equal(ds_a, ds_b) # Contribution by Chris Markiewicz # And interleaving __call__ and generator fetches gen1 = b.generate(ds) gen2 = b.generate(ds) seq1, seq2, seq3 = [], [], [] for i in xrange(3): seq1.append(gen1.next()) seq2.append(gen2.next()) seq3.append(b(ds)) # Produces expected sequences for i in xrange(3): assert_datasets_equal(balanced[i], seq1[i]) assert_datasets_equal(balanced[i], seq2[i]) # And all __call__s return the same result ds_a = seq3[0] for ds_b in seq3[1:]: assert_array_equal(ds_a.sa.ids, ds_b.sa.ids) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3, )) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # same but include all offlimit samples bal = Balancer(limit={'chunks': 3}, include_offlimit=True, apply_selection=True) res = bal(ds) assert_array_equal(res.sa['chunks'].unique, range(10)) # chunk three still balanced, but the rest is not, i.e. all samples included assert_equal( get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(), [2] * 4) assert_equal( get_nelements_per_value(res.sa.chunks).values(), [10, 10, 10, 8, 10, 10, 10, 10, 10, 10]) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round( np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1, 2], 5) ds.fa['chk'] = np.repeat([1, 2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array( [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)] ).T rand_acc_p_slow = np.array([ [dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert(np.all(rand_acc_p <= 1)) assert(np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert(np.all(acc_p <= 1)) assert(np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)