예제 #1
0
def test_pval():
    def not_inplace_shuffle(x):
        x = list(x)
        random.shuffle(x)
        return x

    x = range(100000) * 20
    x = np.array(x)
    x = x.reshape(20, 100000)
    x = x.T
    x = np.apply_along_axis(not_inplace_shuffle, axis=0, arr=x)
    expected_result = [100000 - 100000 * 0.001] * 20

    thresholds = gct.get_thresholding_map(x, p=0.001)
    assert_array_equal(thresholds, expected_result)
    # works with datasets too
    dsthresholds = gct.get_thresholding_map(Dataset(x), p=0.001)
    assert_almost_equal(thresholds, dsthresholds)
    assert_raises(ValueError,
                  gct.get_thresholding_map, x, p=0.00000001)

    x = range(0, 100, 5)
    null_dist = np.repeat(1, 100).astype(float)[None]
    pvals = gct._transform_to_pvals(x, null_dist)
    desired_output = np.array([1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6,
                               0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15,
                               0.1, 0.05])
    assert_array_almost_equal(desired_output, pvals)
예제 #2
0
def test_pval():
    def not_inplace_shuffle(x):
        x = list(x)
        random.shuffle(x)
        return x

    x = range(100000) * 20
    x = np.array(x)
    x = x.reshape(20, 100000)
    x = x.T
    x = np.apply_along_axis(not_inplace_shuffle, axis=0, arr=x)
    expected_result = [100000 - 100000 * 0.001] * 20

    thresholds = gct.get_thresholding_map(x, p=0.001)
    assert_array_equal(thresholds, expected_result)
    # works with datasets too
    dsthresholds = gct.get_thresholding_map(Dataset(x), p=0.001)
    assert_almost_equal(thresholds, dsthresholds)
    assert_raises(ValueError, gct.get_thresholding_map, x, p=0.00000001)

    x = range(0, 100, 5)
    null_dist = np.repeat(1, 100).astype(float)[None]
    pvals = gct._transform_to_pvals(x, null_dist)
    desired_output = np.array([
        1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4,
        0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05
    ])
    assert_array_almost_equal(desired_output, pvals)
예제 #3
0
def test_simple_cluster_level_thresholding():
    nf = 13
    nperms = 100
    pthr_feature = 0.5  # just for testing
    pthr_cluster = 0.5
    rand_acc = np.random.normal(size=(nperms, nf))
    acc = np.random.normal(size=(1, nf))

    # Step 1 is to "fit" "Nonparametrics" per each of the features
    from mvpa2.clfs.stats import Nonparametric
    dists = [Nonparametric(samples) for samples in rand_acc.T]
    # we should be able to assert "p" value for each random sample for each feature
    rand_acc_p = np.array([dist.rcdf(v)
                           for dist, v in zip(dists, rand_acc.T)]).T

    rand_acc_p_slow = np.array(
        [[dist.rcdf(v) for dist, v in zip(dists, sample)]
         for sample in rand_acc])
    assert_array_equal(rand_acc_p_slow, rand_acc_p)

    assert_equal(rand_acc_p.shape, rand_acc.shape)
    assert (np.all(rand_acc_p <= 1))
    assert (np.all(rand_acc_p > 0))

    # 2: apply the same to our acc
    acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :]
    assert (np.all(acc_p <= 1))
    assert (np.all(acc_p > 0))

    skip_if_no_external('scipy')
    # Now we need to do our fancy cluster level madness
    from mvpa2.algorithms.group_clusterthr import \
        get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \
        get_thresholding_map, repeat_cluster_vals

    rand_acc_p_thr = rand_acc_p < pthr_feature
    acc_p_thr = acc_p < pthr_feature

    rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr)
    acc_cluster_sizes = get_cluster_sizes(acc_p_thr)

    # This is how we can compute it within present implementation.
    # It will be a bit different (since it doesn't account for target value if
    # I got it right), and would work only for accuracies
    thr_map = get_thresholding_map(rand_acc, pthr_feature)
    rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map)
    acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map)

    assert_equal(rand_cluster_sizes, rand_cluster_sizes_)
    assert_equal(acc_cluster_sizes, acc_cluster_sizes_)

    #print rand_cluster_sizes
    #print acc_cluster_sizes

    # That is how it is done in group_clusterthr atm
    # store cluster size histogram for later p-value evaluation
    # use a sparse matrix for easy consumption (max dim is the number of
    # features, i.e. biggest possible cluster)
    from scipy.sparse import dok_matrix
    scl = dok_matrix((1, nf + 1), dtype=int)
    for s in rand_cluster_sizes:
        scl[0, s] = rand_cluster_sizes[s]

    test_count_sizes = repeat_cluster_vals(acc_cluster_sizes)
    test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float'))
    # needs conversion to array for comparisons
    test_pvals = np.asanyarray(test_pvals)
    # critical cluster_level threshold (without FW correction between clusters)
    # would be
    clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster]

    if len(clusters_passed_threshold):
        thr_cluster_size = min(clusters_passed_threshold)
        #print("Min cluster size which passed threshold: %d" % thr_cluster_size)
    else:
        #print("No clusters passed threshold")
        pass
    #print test_count_sizes, test_pvals

    acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes)

    for test_pval, test_count_size in zip(test_pvals, test_count_sizes):
        assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
예제 #4
0
clthr = gct.GroupClusterThreshold(n_bootstrap = 100000,
                                  feature_thresh_prob=0.001,
                                  chunk_attr='SN',
                                  fwe_rate=0.05,
                                  multicomp_correction='fdr_bh',
                                  n_blocks=800, n_proc=4)

print('bootstrapping...')
clthr.train(sl_all_map) #bootstrapping group-level chance map
print('Estimate significance & threshold the results... ')
res = clthr(sl_real_map)

#compute p-values for specific sized clusters
clustr_area = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])
#clustr_area = np.array([50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350])
cluster_prob_raw = np.round(gct._transform_to_pvals(clustr_area, clthr._null_cluster_sizes.astype('float')),4)
cluster_prob_raw = np.round(gct._transform_to_pvals(clustr_area, clthr._null_cluster_sizes.astype('float')),4)

null_cluster_sizes = sp.dok_matrix.toarray(clthr._null_cluster_sizes)

#store the outputs...
thresmap_nifti = map2nifti(fds, data=res.fa.featurewise_thresh)
sig_nifti = map2nifti(fds, data=res.fa.clusters_featurewise_thresh)
fwesig_nifti = map2nifti(fds, data=res.fa.clusters_fwe_thresh)

thresmap_nifti.to_filename(slc_path2 + '/' + cond[xCond] + '_sl_thresmap.nii')
sig_nifti.to_filename(slc_path2 + '/' + cond[xCond] + '_sl_sigmap.nii')
fwesig_nifti.to_filename(slc_path2 + '/' + cond[xCond] + '_sl_fwesigmap.nii')


#draw histogram: # of voxels within a random cluster
예제 #5
0
def test_simple_cluster_level_thresholding():
    nf = 13
    nperms = 100
    pthr_feature = 0.5  # just for testing
    pthr_cluster = 0.5
    rand_acc = np.random.normal(size=(nperms, nf))
    acc = np.random.normal(size=(1, nf))

    # Step 1 is to "fit" "Nonparametrics" per each of the features
    from mvpa2.clfs.stats import Nonparametric
    dists = [Nonparametric(samples) for samples in rand_acc.T]
    # we should be able to assert "p" value for each random sample for each feature
    rand_acc_p = np.array(
        [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]
        ).T

    rand_acc_p_slow = np.array([
        [dist.rcdf(v) for dist, v in zip(dists, sample)]
         for sample in rand_acc])
    assert_array_equal(rand_acc_p_slow, rand_acc_p)

    assert_equal(rand_acc_p.shape, rand_acc.shape)
    assert(np.all(rand_acc_p <= 1))
    assert(np.all(rand_acc_p > 0))

    # 2: apply the same to our acc
    acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :]
    assert(np.all(acc_p <= 1))
    assert(np.all(acc_p > 0))

    skip_if_no_external('scipy')
    # Now we need to do our fancy cluster level madness
    from mvpa2.algorithms.group_clusterthr import \
        get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \
        get_thresholding_map, repeat_cluster_vals

    rand_acc_p_thr = rand_acc_p < pthr_feature
    acc_p_thr = acc_p < pthr_feature

    rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr)
    acc_cluster_sizes = get_cluster_sizes(acc_p_thr)

    # This is how we can compute it within present implementation.
    # It will be a bit different (since it doesn't account for target value if
    # I got it right), and would work only for accuracies
    thr_map = get_thresholding_map(rand_acc, pthr_feature)
    rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map)
    acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map)

    assert_equal(rand_cluster_sizes, rand_cluster_sizes_)
    assert_equal(acc_cluster_sizes, acc_cluster_sizes_)

    #print rand_cluster_sizes
    #print acc_cluster_sizes

    # That is how it is done in group_clusterthr atm
    # store cluster size histogram for later p-value evaluation
    # use a sparse matrix for easy consumption (max dim is the number of
    # features, i.e. biggest possible cluster)
    from scipy.sparse import dok_matrix
    scl = dok_matrix((1, nf + 1), dtype=int)
    for s in rand_cluster_sizes:
        scl[0, s] = rand_cluster_sizes[s]

    test_count_sizes = repeat_cluster_vals(acc_cluster_sizes)
    test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float'))
    # needs conversion to array for comparisons
    test_pvals = np.asanyarray(test_pvals)
    # critical cluster_level threshold (without FW correction between clusters)
    # would be
    clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster]

    if len(clusters_passed_threshold):
        thr_cluster_size = min(clusters_passed_threshold)
        #print("Min cluster size which passed threshold: %d" % thr_cluster_size)
    else:
        #print("No clusters passed threshold")
        pass
    #print test_count_sizes, test_pvals


    acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes)

    for test_pval, test_count_size in zip(test_pvals, test_count_sizes):
        assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)