def wardf_test(n=100,k=5): np.random.seed(0) x = np.random.randn(n,2) x[:int(0.7*n)] += 3 G = knn(x, 5) F = field_from_graph_and_data(G, x) u, cost = ward_field_segment(F, qmax=2) v = np.zeros(n) v[:int(0.7*n)]=1 w = np.absolute(u-v) assert_equal(np.sum(w*(1-w)), 0)
def wardf_test(n=100, k=5): np.random.seed(0) x = np.random.randn(n, 2) x[:int(0.7 * n)] += 3 G = knn(x, 5) F = field_from_graph_and_data(G, x) u, cost = ward_field_segment(F, qmax=2) v = np.zeros(n) v[:int(0.7 * n)] = 1 w = np.absolute(u - v) assert_equal(np.sum(w * (1 - w)), 0)
def _extract_clusters_from_diam(labels, T, XYZ, th, diam, k, nCC, CClabels): """ This recursive function modifies the `labels` input array. """ clust_label = 0 for i in range(nCC): # print "Searching connected component ", i, " out of ", nCC I = np.where(CClabels == i)[0] extCC = len(I) if extCC <= (diam + 1) ** 3: diamCC = max_dist(XYZ, I, I) else: diamCC = diam + 1 if diamCC <= diam: labels[I] = np.zeros(extCC, int) + clust_label # print "cluster ", clust_label, ", diam = ", diamCC # print "ext = ", len(I), ", diam = ", max_dist(XYZ,I,I) clust_label += 1 else: # build the field p = len(T[I]) F = field_from_graph_and_data(wgraph_from_3d_grid(XYZ[:, I].T, k), np.reshape(T[I], (p, 1))) # compute the blobs idx, parent, label = F.threshold_bifurcations(0, th) nidx = np.size(idx) height = np.array([np.ceil(np.sum(label == i) ** (1.0 / 3)) for i in np.arange(nidx)]) # root = nidx-1 root = np.where(np.arange(nidx) == parent)[0] # Can constraint be met within current region? Imin = I[T[I] >= height[root]] extmin = len(Imin) if extmin <= (diam + 1) ** 3: dmin = max_dist(XYZ, Imin, Imin) else: dmin = diam + 1 if dmin <= diam: # If so, search for the largest cluster meeting the constraint Iclust = Imin # Smallest cluster J = I[T[I] < height[root]] # Remaining voxels argsortTJ = np.argsort(T[J])[::-1] # Sorted by decreasing T values l = 0 L = np.array([J[argsortTJ[l]]], int) diameter = dmin new_diameter = max(dmin, max_dist(XYZ, Iclust, L)) while new_diameter <= diam: # print "diameter = " + str(new_diameter) # sys.stdout.flush() Iclust = np.concatenate((Iclust, L)) diameter = new_diameter # print "diameter = ", diameter l += 1 L = np.array([J[argsortTJ[l]]], int) new_diameter = max(diameter, max_dist(XYZ, Iclust, L)) labels[Iclust] = np.zeros(len(Iclust), int) + clust_label # print "cluster ", clust_label, ", diam = ", diameter # print "ext = ", len(Iclust), ", diam = ", max_dist(XYZ,Iclust,Iclust) clust_label += 1 else: # If not, search inside sub-regions # print "Searching inside sub-regions " Irest = I[T[I] > height[root]] rest_labels = extract_clusters_from_diam(T[Irest], XYZ[:, Irest], th, diam, k) rest_labels[rest_labels >= 0] += clust_label clust_label = rest_labels.max() + 1 labels[Irest] = rest_labels return labels
def extract_clusters_from_diam(T, XYZ, th, diam, k=18): """ Extract clusters from a statistical map under diameter constraint and above given threshold In: T (p) statistical map XYZ (3,p) voxels coordinates th <float> minimum threshold diam <int> maximal diameter (in voxels) k <int> the number of neighbours considered. (6,18 or 26) Out: labels (p) cluster labels """ CClabels = extract_clusters_from_thresh(T, XYZ, th, k) nCC = CClabels.max() + 1 labels = -np.ones(len(CClabels), int) clust_label = 0 for i in xrange(nCC): #print "Searching connected component ", i, " out of ", nCC I = np.where(CClabels == i)[0] extCC = len(I) if extCC <= (diam + 1)**3: diamCC = max_dist(XYZ, I, I) else: diamCC = diam + 1 if diamCC <= diam: labels[I] = np.zeros(extCC, int) + clust_label #print "cluster ", clust_label, ", diam = ", diamCC #print "ext = ", len(I), ", diam = ", max_dist(XYZ,I,I) clust_label += 1 else: # build the field p = len(T[I]) F = field_from_graph_and_data(wgraph_from_3d_grid(XYZ[:, I].T, k), np.reshape(T[I], (p, 1))) # compute the blobs idx, parent, label = F.threshold_bifurcations(0, th) nidx = np.size(idx) height = np.array([ np.ceil(np.sum(label == i)**(1. / 3)) for i in np.arange(nidx) ]) #root = nidx-1 root = np.where(np.arange(nidx) == parent)[0] # Can constraint be met within current region? Imin = I[T[I] >= height[root]] extmin = len(Imin) if extmin <= (diam + 1)**3: dmin = max_dist(XYZ, Imin, Imin) else: dmin = diam + 1 if dmin <= diam: # If so, search for the largest cluster meeting the constraint Iclust = Imin # Smallest cluster J = I[T[I] < height[root]] # Remaining voxels argsortTJ = np.argsort( T[J])[::-1] # Sorted by decreasing T values l = 0 L = np.array([J[argsortTJ[l]]], int) diameter = dmin new_diameter = max(dmin, max_dist(XYZ, Iclust, L)) while new_diameter <= diam: #print "diameter = " + str(new_diameter) #sys.stdout.flush() Iclust = np.concatenate((Iclust, L)) diameter = new_diameter #print "diameter = ", diameter l += 1 L = np.array([J[argsortTJ[l]]], int) new_diameter = max(diameter, max_dist(XYZ, Iclust, L)) labels[Iclust] = np.zeros(len(Iclust), int) + clust_label #print "cluster ", clust_label, ", diam = ", diameter #print "ext = ", len(Iclust), ", diam = ", max_dist(XYZ,Iclust,Iclust) clust_label += 1 else: # If not, search inside sub-regions #print "Searching inside sub-regions " Irest = I[T[I] > height[root]] rest_labels = extract_clusters_from_diam( T[Irest], XYZ[:, Irest], th, diam, k) rest_labels[rest_labels >= 0] += clust_label clust_label = rest_labels.max() + 1 labels[Irest] = rest_labels return labels
def _extract_clusters_from_diam(labels, T, XYZ, th, diam, k, nCC, CClabels): """ This recursive function modifies the `labels` input array. """ clust_label = 0 for i in range(nCC): #print "Searching connected component ", i, " out of ", nCC I = np.where(CClabels==i)[0] extCC = len(I) if extCC <= (diam+1)**3: diamCC = max_dist(XYZ,I,I) else: diamCC = diam+1 if diamCC <= diam: labels[I] = np.zeros(extCC,int) + clust_label #print "cluster ", clust_label, ", diam = ", diamCC #print "ext = ", len(I), ", diam = ", max_dist(XYZ,I,I) clust_label += 1 else: # build the field p = len(T[I]) F = field_from_graph_and_data( wgraph_from_3d_grid(XYZ[:, I].T, k), np.reshape(T[I],(p,1))) # compute the blobs idx, parent,label = F.threshold_bifurcations(0,th) nidx = np.size(idx) height = np.array([np.ceil(np.sum(label == i) ** (1./3)) for i in np.arange(nidx)]) #root = nidx-1 root = np.where(np.arange(nidx)==parent)[0] # Can constraint be met within current region? Imin = I[T[I]>=height[root]] extmin = len(Imin) if extmin <= (diam+1)**3: dmin = max_dist(XYZ,Imin,Imin) else: dmin = diam+1 if dmin <= diam:# If so, search for the largest cluster meeting the constraint Iclust = Imin # Smallest cluster J = I[T[I]<height[root]] # Remaining voxels argsortTJ = np.argsort(T[J])[::-1] # Sorted by decreasing T values l = 0 L = np.array([J[argsortTJ[l]]],int) diameter = dmin new_diameter = max(dmin,max_dist(XYZ,Iclust,L)) while new_diameter <= diam: #print "diameter = " + str(new_diameter) #sys.stdout.flush() Iclust = np.concatenate((Iclust,L)) diameter = new_diameter #print "diameter = ", diameter l += 1 L = np.array([J[argsortTJ[l]]],int) new_diameter = max(diameter,max_dist(XYZ,Iclust,L)) labels[Iclust] = np.zeros(len(Iclust),int) + clust_label #print "cluster ", clust_label, ", diam = ", diameter #print "ext = ", len(Iclust), ", diam = ", max_dist(XYZ,Iclust,Iclust) clust_label += 1 else:# If not, search inside sub-regions #print "Searching inside sub-regions " Irest = I[T[I]>height[root]] rest_labels = extract_clusters_from_diam(T[Irest],XYZ[:,Irest],th,diam,k) rest_labels[rest_labels>=0] += clust_label clust_label = rest_labels.max() + 1 labels[Irest] = rest_labels return labels
def get_3d_peaks(image, mask=None, threshold=0., nn=18, order_th=0, verbose=False): """ returns all the peaks of image that are with the mask and above the provided threshold Parameters ---------- image, (3d) test image mask=None, (3d) mask image By default no masking is performed threshold=0., float, threshold value above which peaks are considered nn=18, int, number of neighbours of the topological spatial model order_th=0, int, threshold on topological order to validate the peaks Returns ------- peaks, a list of dictionaries, where each dict has the fields: vals, map value at the peak order, topological order of the peak ijk, array of shape (1,3) grid coordinate of the peak pos, array of shape (n_maxima,3) mm coordinates (mapped by affine) of the peaks """ # Masking shape = image.shape if mask is not None: data = image.get_data() * mask.get_data() xyz = np.array(np.where(data > threshold)).T data = data[data > threshold] else: data = image.get_data().ravel() xyz = np.reshape(np.indices(shape), (3, np.prod(shape))).T affine = get_affine(image) if not (data > threshold).any(): if verbose: print('no suprathreshold voxels found') return None # Extract local maxima and connex components above some threshold ff = field_from_graph_and_data(wgraph_from_3d_grid(xyz, k=18), data) maxima, order = ff.get_local_maxima(th=threshold) # retain only the maxima greater than the specified order maxima = maxima[order > order_th] order = order[order > order_th] n_maxima = len(maxima) if n_maxima == 0: # should not occur ? return None # reorder the maxima to have decreasing peak value vals = data[maxima] idx = np.argsort(-vals) maxima = maxima[idx] order = order[idx] vals = data[maxima] ijk = xyz[maxima] pos = np.dot(np.hstack((ijk, np.ones((n_maxima, 1)))), affine.T)[:, :3] peaks = [{ 'val': vals[k], 'order': order[k], 'ijk': ijk[k], 'pos': pos[k] } for k in range(n_maxima)] return peaks
def cluster_stats(zimg, mask, height_th, height_control='fpr', cluster_th=0, nulls={}): """ Return a list of clusters, each cluster being represented by a dictionary. Clusters are sorted by descending size order. Within each cluster, local maxima are sorted by descending depth order. Parameters ---------- zimg: z-score image mask: mask image height_th: cluster forming threshold height_control: string false positive control meaning of cluster forming threshold: 'fpr'|'fdr'|'bonferroni'|'none' cluster_th: cluster size threshold null_s : cluster-level calibration method: None|'rft'|array Note ---- This works only with three dimensional data """ # Masking if len(mask.get_shape()) > 3: xyz = np.where((mask.get_data() > 0).squeeze()) zmap = zimg.get_data().squeeze()[xyz] else: xyz = np.where(mask.get_data() > 0) zmap = zimg.get_data()[xyz] xyz = np.array(xyz).T nvoxels = np.size(xyz, 0) # Thresholding if height_control == 'fpr': zth = sp_stats.norm.isf(height_th) elif height_control == 'fdr': zth = empirical_pvalue.gaussian_fdr_threshold(zmap, height_th) elif height_control == 'bonferroni': zth = sp_stats.norm.isf(height_th / nvoxels) else: ## Brute-force thresholding zth = height_th pth = sp_stats.norm.sf(zth) above_th = zmap > zth if len(np.where(above_th)[0]) == 0: return None, None ## FIXME zmap_th = zmap[above_th] xyz_th = xyz[above_th] # Clustering ## Extract local maxima and connex components above some threshold ff = field_from_graph_and_data(wgraph_from_3d_grid(xyz_th, k=18), zmap_th) maxima, depth = ff.get_local_maxima(th=zth) labels = ff.cc() ## Make list of clusters, each cluster being a dictionary clusters = [] for k in range(labels.max() + 1): s = np.sum(labels == k) if s >= cluster_th: in_cluster = labels[maxima] == k m = maxima[in_cluster] d = depth[in_cluster] sorted = d.argsort()[::-1] clusters.append({'size': s, 'maxima': m[sorted], 'depth': d[sorted]}) ## Sort clusters by descending size order def smaller(c1, c2): return int(np.sign(c2['size'] - c1['size'])) clusters.sort(cmp=smaller) # FDR-corrected p-values fdr_pvalue = empirical_pvalue.all_fdr_gaussian(zmap)[above_th] # Default "nulls" if not 'zmax' in nulls: nulls['zmax'] = 'bonferroni' if not 'smax' in nulls: nulls['smax'] = None if not 's' in nulls: nulls['s'] = None # Report significance levels in each cluster for c in clusters: maxima = c['maxima'] zscore = zmap_th[maxima] pval = sp_stats.norm.sf(zscore) # Replace array indices with real coordinates c['maxima'] = apply_affine(zimg.get_affine(), xyz_th[maxima]) c['zscore'] = zscore c['pvalue'] = pval c['fdr_pvalue'] = fdr_pvalue[maxima] # Voxel-level corrected p-values p = None if nulls['zmax'] == 'bonferroni': p = bonferroni(pval, nvoxels) elif isinstance(nulls['zmax'], np.ndarray): p = simulated_pvalue(zscore, nulls['zmax']) c['fwer_pvalue'] = p # Cluster-level p-values (corrected) p = None if isinstance(nulls['smax'], np.ndarray): p = simulated_pvalue(c['size'], nulls['smax']) c['cluster_fwer_pvalue'] = p # Cluster-level p-values (uncorrected) p = None if isinstance(nulls['s'], np.ndarray): p = simulated_pvalue(c['size'], nulls['s']) c['cluster_pvalue'] = p # General info info = {'nvoxels': nvoxels, 'threshold_z': zth, 'threshold_p': pth, 'threshold_pcorr': bonferroni(pth, nvoxels)} return clusters, info
def get_3d_peaks(image, mask=None, threshold=0., nn=18, order_th=0): """ returns all the peaks of image that are with the mask and above the provided threshold Parameters ---------- image, (3d) test image mask=None, (3d) mask image By default no masking is performed threshold=0., float, threshold value above which peaks are considered nn=18, int, number of neighbours of the topological spatial model order_th=0, int, threshold on topological order to validate the peaks Returns ------- peaks, a list of dictionray, where each dic has the fields: vals, map value at the peak order, topological order of the peak ijk, array of shape (1,3) grid coordinate of the peak pos, array of shape (n_maxima,3) mm coordinates (mapped by affine) of the peaks """ # Masking if mask is not None: bmask = mask.get_data().ravel() data = image.get_data().ravel()[bmask > 0] xyz = np.array(np.where(bmask > 0)).T else: shape = image.get_shape() data = image.get_data().ravel() xyz = np.reshape(np.indices(shape), (3, np.prod(shape))).T affine = image.get_affine() if not (data > threshold).any(): return None # Extract local maxima and connex components above some threshold ff = field_from_graph_and_data(wgraph_from_3d_grid(xyz, k=18), data) maxima, order = ff.get_local_maxima(th=threshold) # retain only the maxima greater than the specified order maxima = maxima[order > order_th] order = order[order > order_th] n_maxima = len(maxima) if n_maxima == 0: # should not occur ? return None # reorder the maxima to have decreasing peak value vals = data[maxima] idx = np.argsort(- vals) maxima = maxima[idx] order = order[idx] vals = data[maxima] ijk = xyz[maxima] pos = np.dot(np.hstack((ijk, np.ones((n_maxima, 1)))), affine.T)[:, :3] peaks = [{'val': vals[k], 'order': order[k], 'ijk': ijk[k], 'pos': pos[k]} for k in range(n_maxima)] return peaks
def extract_clusters_from_diam(T,XYZ,th,diam,k=18): """ Extract clusters from a statistical map under diameter constraint and above given threshold In: T (p) statistical map XYZ (3,p) voxels coordinates th <float> minimum threshold diam <int> maximal diameter (in voxels) k <int> the number of neighbours considered. (6,18 or 26) Out: labels (p) cluster labels """ CClabels = extract_clusters_from_thresh(T,XYZ,th,k) nCC = CClabels.max() + 1 labels = -np.ones(len(CClabels),int) clust_label = 0 for i in xrange(nCC): #print "Searching connected component ", i, " out of ", nCC I = np.where(CClabels==i)[0] extCC = len(I) if extCC <= (diam+1)**3: diamCC = max_dist(XYZ,I,I) else: diamCC = diam+1 if diamCC <= diam: labels[I] = np.zeros(extCC,int) + clust_label #print "cluster ", clust_label, ", diam = ", diamCC #print "ext = ", len(I), ", diam = ", max_dist(XYZ,I,I) clust_label += 1 else: # build the field p = len(T[I]) F = field_from_graph_and_data( wgraph_from_3d_grid(XYZ[:, I].T, k), np.reshape(T[I],(p,1))) # compute the blobs idx, parent,label = F.threshold_bifurcations(0,th) nidx = np.size(idx) height = np.array([np.ceil(np.sum(label == i) ** (1./3)) for i in np.arange(nidx)]) #root = nidx-1 root = np.where(np.arange(nidx)==parent)[0] # Can constraint be met within current region? Imin = I[T[I]>=height[root]] extmin = len(Imin) if extmin <= (diam+1)**3: dmin = max_dist(XYZ,Imin,Imin) else: dmin = diam+1 if dmin <= diam:# If so, search for the largest cluster meeting the constraint Iclust = Imin # Smallest cluster J = I[T[I]<height[root]] # Remaining voxels argsortTJ = np.argsort(T[J])[::-1] # Sorted by decreasing T values l = 0 L = np.array([J[argsortTJ[l]]],int) diameter = dmin new_diameter = max(dmin,max_dist(XYZ,Iclust,L)) while new_diameter <= diam: #print "diameter = " + str(new_diameter) #sys.stdout.flush() Iclust = np.concatenate((Iclust,L)) diameter = new_diameter #print "diameter = ", diameter l += 1 L = np.array([J[argsortTJ[l]]],int) new_diameter = max(diameter,max_dist(XYZ,Iclust,L)) labels[Iclust] = np.zeros(len(Iclust),int) + clust_label #print "cluster ", clust_label, ", diam = ", diameter #print "ext = ", len(Iclust), ", diam = ", max_dist(XYZ,Iclust,Iclust) clust_label += 1 else:# If not, search inside sub-regions #print "Searching inside sub-regions " Irest = I[T[I]>height[root]] rest_labels = extract_clusters_from_diam(T[Irest],XYZ[:,Irest],th,diam,k) rest_labels[rest_labels>=0] += clust_label clust_label = rest_labels.max() + 1 labels[Irest] = rest_labels return labels