def test_repeat_cluster_vals(): assert_array_equal(gct.repeat_cluster_vals({1: 2, 3: 1}), [1, 1, 3]) assert_array_equal(gct.repeat_cluster_vals({ 1: 2, 3: 2, 2: 1 }), [1, 1, 2, 3, 3]) assert_array_equal(gct.repeat_cluster_vals({ 1: 2, 3: 1 }, { 1: 0.2, 3: 0.5 }), [0.2, 0.2, 0.5]) assert_array_equal( gct.repeat_cluster_vals({ 1: 2, 3: 2, 2: 1 }, { 1: 'a', 2: 'b', 3: 'c' }), ['a', 'a', 'b', 'c', 'c'])
def test_repeat_cluster_vals(): assert_array_equal(gct.repeat_cluster_vals({1: 2, 3: 1}), [1, 1, 3]) assert_array_equal(gct.repeat_cluster_vals({1: 2, 3: 2, 2: 1}), [1, 1, 2, 3, 3]) assert_array_equal(gct.repeat_cluster_vals({1: 2, 3: 1}, {1: 0.2, 3: 0.5}), [0.2, 0.2, 0.5]) assert_array_equal(gct.repeat_cluster_vals({1: 2, 3: 2, 2: 1}, {1: 'a', 2: 'b', 3: 'c'}), ['a', 'a', 'b', 'c', 'c'])
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]).T rand_acc_p_slow = np.array( [[dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert (np.all(rand_acc_p <= 1)) assert (np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert (np.all(acc_p <= 1)) assert (np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array( [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)] ).T rand_acc_p_slow = np.array([ [dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert(np.all(rand_acc_p <= 1)) assert(np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert(np.all(acc_p <= 1)) assert(np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)