def process_image_nn_based_on_radius(img, img_class): img = np.asarray(img) img_height, img_width, ch = img.shape col, row = np.meshgrid(np.arange(img_height), np.arange(img_width)) coord = np.stack((col, row), axis=2).reshape(-1, 2) #dmax: 8 neighbours; 1: 4 neighbours (with Euclidean distance) kdT = KDTree(coord) res = kdT.query_pairs(dmax) res = [(x[0],x[1]) for x in list(res)] res = np.transpose(res) ### Create a graph #G = nx.Graph() #for i in range(coord.shape[0]): # G.add_node(i, intensity=img[coord[i,0], coord[i,1]], test=False, val=False, label=0) #G.add_edges_from(res) ### Add nodes x = torch.Tensor(img.reshape(img_height*img_width, ch)) #G.edges() edge_index = torch.LongTensor(res) D = torch_geometric.data.Data(x = x, edge_index = edge_index, y=img_class) return D
def clusters(points, radius): """ Find clusters of points which have neighbours closer than radius Parameters --------- points : (n, d) float Points of dimension d radius : float Max distance between points in a cluster Returns ---------- groups : (m,) sequence of int Indices of points in a cluster """ from . import graph tree = KDTree(points) # some versions return pairs as a set of tuples pairs = tree.query_pairs(r=radius, output_type='ndarray') # group connected components groups = graph.connected_components(pairs) return groups
def find_pairs(cutoff, X, Y=None): """ Find pairs with euclidean distance below C{cutoff}. Either between C{X} and C{Y}, or within C{X} if C{Y} is C{None}. Uses a KDTree and thus is memory efficient and reasonable fast. @type cutoff: float @type X: (m,n) numpy.array @type Y: (k,n) numpy.array @return: set of index tuples @rtype: iterable """ try: from scipy.spatial import cKDTree as KDTree KDTree.query_pairs KDTree.query_ball_tree except (ImportError, AttributeError): from scipy.spatial import KDTree tree = KDTree(X, len(X)) if Y is None: return tree.query_pairs(cutoff) other = KDTree(Y, len(Y)) contacts = tree.query_ball_tree(other, cutoff) return ((i, j) for (i, js) in enumerate(contacts) for j in js)
def get_clusters(vectors, metric, cutoff): print("Making KD tree. len(vectors) ==", len(vectors)) KDT = KDTree(vectors) print("KD tree done!") pairs = KDT.query_pairs(r=cutoff, p=inf) print("pairs done!") print("Making DJSet") # ds = DisjointSets(vectors) print("DJSet done! Making clusters...") print("Making list") pairs = list(pairs) print("calling DSC. len(pairs) ==", len(pairs)) clusters = disjoint_sets_cluster(pairs) print("Clustered, left numba") #Actually, gotta invert those. Numba can't for some reason... cl = defaultdict(list) for pt, idp in clusters: cl[idp].append(vectors[pt]) print("All done") return cl.values() '''for x1, x2 in pairs: ds.union(vectors[x1], vectors[x2])''' print("Done!") return #ds.get_sets() '''
def test_crystal_gel(): """Experimental data from a crystallizing gel.""" pos = np.loadtxt('examples/AR-Res06A_scan2_t890.xyz', skiprows=1) maxbondlength = 12.5 #spatial indexing tree = KDTree(pos, 12) #query bonds = tree.query_pairs(maxbondlength, output_type='ndarray') inside = np.all( (pos - pos.min(0) > maxbondlength) & (pos.max() - pos > maxbondlength), -1) #number of neighbours per particle Nngb = np.zeros(len(pos), int) np.add.at(Nngb, bonds.ravel(), 1) inside[Nngb < 4] = False #tensorial boo q6m = boo.bonds2qlm(pos, bonds, l=6) q4m = boo.bonds2qlm(pos, bonds, l=4) #coarse-graining Q6m, inside2 = boo.coarsegrain_qlm(q6m, bonds, inside) Q4m, inside3 = boo.coarsegrain_qlm(q4m, bonds, inside) assert np.all(inside2 == inside3) #crystals xpos = boo.x_particles(q6m, bonds) assert xpos.sum() == 14188 #surface particles surf = boo.x_particles(q6m, bonds, nb_thr=2) & np.bitwise_not(xpos) assert surf.sum() == 9288
def _fast_construct_edges(G, radius, p): """Construct edges for random geometric graph. Requires scipy to be installed. """ pos = nx.get_node_attributes(G, 'pos') nodes, coords = list(zip(*pos.items())) kdtree = KDTree(coords) # Cannot provide generator. edge_indexes = kdtree.query_pairs(radius, p) edges = ((nodes[u], nodes[v]) for u, v in edge_indexes) G.add_edges_from(edges)
def _fast_edges(G, radius, p): """Returns edge list of node pairs within `radius` of each other using scipy KDTree and Minkowski distance metric `p` Requires scipy to be installed. """ pos = nx.get_node_attributes(G, 'pos') nodes, coords = list(zip(*pos.items())) kdtree = KDTree(coords) # Cannot provide generator. edge_indexes = kdtree.query_pairs(radius, p) edges = ((nodes[u], nodes[v]) for u, v in edge_indexes) return edges
def clusters(points, radius): """ Find clusters of points which have neighbours closer than radius :param points: nxd points :param radius: max distance between points in a cluster :return: [point_list, ...] author: reviserd by weiwei date: 20210120 """ tree = KDTree(points) pairs = tree.query_pairs(radius) graph = from_edgelist(pairs) groups = list(connected_components(graph)) return groups
def cleanup_pairs_KDTree(xyz, kind, data_shape, dmin, grad): npoint, ndim = xyz.shape N = data_shape[0] logger.debug('Building KDTree') # TODO: support non square domains if not np.all(np.asarray(data_shape) == data_shape[0]): raise Exception('All axis should have the same dimension.') if len(xyz) == 0: return np.ones(0, dtype=bool) tree = KDTree(xyz, boxsize=data_shape[0], copy_data=True) pairs = tree.query_pairs(dmin, p=np.inf, output_type='ndarray') logger.debug('Removing close pairs') xc = np.round(xyz + 0.5) - 0.5 skip = _cleanup_pairs_KDTree(xyz, xc, kind, pairs, N, data_shape, np.linalg.norm(grad, axis=1)).astype(bool) return ~skip
def clusters(points, radius): ''' Find clusters of points which have neighbours closer than radius Arguments --------- points: (n, d) points (of dimension d) radius: max distance between points in a cluster Returns: groups: (m) sequence of indices for points ''' tree = KDTree(points) pairs = tree.query_pairs(radius) graph = from_edgelist(pairs) groups = list(connected_components(graph)) return groups
def dedup(particles, radius): grouped = ddict(list) for particle in particles: grouped[particle['rlnMicrographName']] += [tuple(particle)] cleaned = [] for image in grouped: group = np.array(grouped[image], dtype=particles.dtype) tree = KDTree(positions(group)) pairs = tree.query_pairs(radius) keep = connected_components(len(group), pairs) #if len(pairs) > 0: #print('image:', image, 'has', len(pairs), 'duplicates') #print(pairs) #print(keep) #print('-----') for idx in keep: cleaned += [tuple(group[idx])] return np.array(cleaned, dtype=particles.dtype)
def clusters(points, radius): """ Find clusters of points which have neighbours closer than radius Parameters --------- points: (n, d) points (of dimension d) radius: max distance between points in a cluster Returns ---------- groups: (m) sequence of indices for points """ from . import graph tree = KDTree(points) pairs = tree.query_pairs(radius) groups = graph.connected_components(pairs) return groups
def geometric_edges(G, radius, p): """Returns edge list of node pairs within `radius` of each other Radius uses Minkowski distance metric `p`. If scipy available, use scipy KDTree to speed computation. """ nodes_pos = G.nodes(data="pos") try: from scipy.spatial import cKDTree as KDTree except ImportError: # no scipy KDTree so compute by for-loop radius_p = radius ** p edges = [ (u, v) for (u, pu), (v, pv) in combinations(nodes_pos, 2) if sum(abs(a - b) ** p for a, b in zip(pu, pv)) <= radius_p ] return edges # scipy KDTree is available nodes, coords = list(zip(*nodes_pos)) kdtree = KDTree(coords) # Cannot provide generator. edge_indexes = kdtree.query_pairs(radius, p) edges = [(nodes[u], nodes[v]) for u, v in edge_indexes] return edges
class Spade: """ Class implementing Peng Qiu's SPADE algorithm, following S8 in the supplemental methods of his Nature Paper. """ nsamples = 2000 distance_metric = 1 distance_threshold = None alpha = 5 # if distance_threshold is none, then distance_threshold = median_min_dist * alpha def __init__(self, data, use_KD_tree=True): # We assume that data comes in the format stored in Flowdata class self.data = data.transpose() self.use_KD_tree = use_KD_tree if self.use_KD_tree: self._init_KD_tree() if self.use_KD_tree is False: self.kd_tree = None def run(self): """ Apply SPADE algorithm """ # Step 1: apply density dependent downsampling self.estimate_median_dist() self.compute_local_density() self.downsample() def _init_KD_tree(self): self.kd_tree = KDTree(self.data) def estimate_median_dist(self): """Estimate the median distance between cells. This is used to compute """ # Randomly selected indices if self.nsamples >= self.data.shape[1]: index = np.random.choice(self.data.shape[0], self.nsamples, replace=False) x = self.data[index, :] else: index = np.range(0, self.data.shape[0]) x = self.data # which ell_p norm is used if self.use_KD_tree: # We need to take the first two points (k=2), since distance of the point # to itself is zero. (dist, i) = self.kd_tree.query(x, k=2, p=self.distance_metric) dist = dist[:, 1] else: dist = np.zeros(self.nsamples) d = np.zeros(self.data.shape[0]) for j in range(self.nsamples): err = (np.abs(x[j] - self.data))**distance_metric np.sum(err, axis=1, out=d) # give infinite distance to the point with itself d[index[j]] = float('inf') dist[j] = d.min() self.median_dist = np.median(dist) if self.distance_threshold is None: self.distance_threshold = self.alpha * self.median_dist return self.median_dist def compute_local_density_using_pairs(self): local_density = np.zeros(self.data.shape[0]) if self.use_KD_tree: pairs = self.kd_tree.query_pairs(self.distance_threshold, p=self.distance_metric) print "Found {} pairs".format(len(pairs)) for p in pairs: local_density[p[0]] += 1 local_density[p[1]] += 1 print local_density.max() def compute_local_density(self): print self.distance_threshold # This approach seems slightly faster, likely due to decreased memory # requirements if self.use_KD_tree: local_density = np.zeros(self.data.shape[0]) for j in range(self.data.shape[0]): index = self.kd_tree.query_ball_point(self.data[j], self.distance_threshold, p=self.distance_metric) local_density[j] = len(index) - 1 # A slightly slower approach, I am leaving here in case of later # version changes if self.use_KD_tree and False: index = self.kd_tree.query_ball_point(self.data, self.distance_threshold, p=self.distance_metric) local_density = map(lambda i: len(i) - 1, index) print local_density self.local_density = local_density return local_density def downsample(self): target_density = 10 outlier_density = 3 local_density = self.local_density # compute the probability of keeping vector # events that are in the outlier range prob = np.less_equal(outlier_density, local_density) * np.less( local_density, target_density) downsampled_data = self.data[prob, :] # events that are in high density regions prob2 = np.less(target_density, local_density) * (target_density / (local_density + 1e-14)) downsample_index = np.random.choice(self.data.shape[0], math.ceil(prob2.sum()), replace=False, p=prob2 / prob2.sum()) downsampled_data = np.append(downsampled_data, self.data[downsample_index, :]) print downsampled_data.shape self.downsampled_data = downsampled_data
def merge_tips(mesh, all_paths, roots, tot_path_lengths, large_skel_path_threshold=5000, max_tip_d=2000): # collect all the tips of the skeletons (including roots) skel_tips = [] all_tip_indices = [] for paths, root in zip(all_paths, roots): tips = [] tip_indices = [] for path in paths: tip_ind = path[0] tip = mesh.vertices[tip_ind, :] tips.append(tip) tip_indices.append(tip_ind) root_tip = mesh.vertices[root, :] tips.append(root_tip) tip_indices.append(root) skel_tips.append(np.vstack(tips)) all_tip_indices.append(np.array(tip_indices)) # this is our overall tip matrix merged together all_tips = np.vstack(skel_tips) # and the vertex index of those tips in the original mesh all_tip_indices = np.concatenate(all_tip_indices) # variable to keep track of what component each tip was from tip_component = np.zeros(all_tips.shape[0]) # counter to keep track of an overall tip index as we go through # the components with different numbers of tips ind_counter = 0 # setup the prize collection steiner forest problem variables # prizes will be related to path length of the tip components tip_prizes = [] # where to collect all the tip<>tip edges all_edges = [] # where to collect all the tip<>tip edge weights all_edge_weights = [] # loop over all the components and their tips for k, tips, path_lengths in zip(range(len(tot_path_lengths)), skel_tips, tot_path_lengths): # how many tips in this component ntips = tips.shape[0] # calculate the total path length in this component path_len = np.sum(np.array(path_lengths)) # the prize is 0 if this is small, and the path length if big prize = path_len if path_len > large_skel_path_threshold else 0 # the cost of traveling within a skeleton is 0 if big, and the path_len if small cost = path_len if path_len <= large_skel_path_threshold else 0 # add a block of prizes to the tip prizes for this component tip_prizes.append(prize * np.ones(ntips)) # make an array of overall tip index for this component comp_tips = np.arange(ind_counter, ind_counter + ntips, dtype=np.int64) # add edges between this components root and each of the tips root_tips = (ind_counter + ntips - 1) * np.ones(ntips, dtype=np.int64) in_tip_edges = np.hstack( [root_tips[:, np.newaxis], comp_tips[:, np.newaxis]]) all_edges.append(in_tip_edges) # add a block for the cost of these edges all_edge_weights.append(cost * np.ones(ntips)) # note what component each of these tips is from tip_component[comp_tips] = k # increment our overall index counter ind_counter += ntips # gather all the prizes into a single block tip_prizes = np.concatenate(tip_prizes) # make a kdtree with all the tips tip_tree = KDTree(all_tips) # find the tips near one another close_tips = tip_tree.query_pairs(max_tip_d, output_type='ndarray') # filter out close tips from the same component diff_comp = ~(tip_component[close_tips[:, 0]] == tip_component[close_tips[:, 1]]) filt_close_tips = close_tips[diff_comp] # add these as edges all_edges.append(filt_close_tips) # with weights equal to their euclidean distance dv = np.linalg.norm(all_tips[filt_close_tips[:, 0], :] - all_tips[filt_close_tips[:, 1]], axis=1) all_edge_weights.append(dv) # consolidate the edges and weights into a single array inter_tip_weights = np.concatenate(all_edge_weights) inter_tip_edges = np.concatenate(all_edges) # run the prize collecting steiner forest optimization mst_verts, mst_edges = pcst_fast.pcst_fast(inter_tip_edges, tip_prizes, inter_tip_weights, -1, 1, 'gw', 1) # # find the set of mst edges that are between connected components new_mst_edges = mst_edges[tip_component[inter_tip_edges[mst_edges, 0]] != tip_component[inter_tip_edges[mst_edges, 1]]] good_inter_tip_edges = inter_tip_edges[new_mst_edges, :] # get these in the original index new_edges_orig_ind = all_tip_indices[good_inter_tip_edges] # # collect all the edges for all the paths into a single list # # with the original indices of the mesh orig_edges = [] for paths, root in zip(all_paths, roots): edges = utils.paths_to_edges(paths) orig_edges.append(edges) orig_edges = np.vstack(orig_edges) # and add our new mst edges tot_edges = np.vstack([orig_edges, new_edges_orig_ind]) return tot_edges
def Execute_Correspondences_CreateInputs(candidates, normalized_images, im_th, cycle, channels, nbit): inputs_df = pd.DataFrame( columns=['cycle', 'ch', 'x', 'y', 'Intensities_window_5x5']) max_df = pd.DataFrame(columns=[ 'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G', 'x_C', 'y_C', 'x_A', 'y_A', 'cycle' ]) cc, n_c = label(np.amax(candidates[cycle, 2:channels, :, :], axis=0), return_num=True, connectivity=1) conn_components = np.zeros((4, candidates.shape[-2], candidates.shape[-1])) for ch in range(4): conn_components[ch, :, :] = np.multiply( cc, candidates[cycle, ch + 2, :, :]) for i in tqdm(range(1, n_c + 1)): ch, y, x = np.where(conn_components == i) kdT_tmp = KDTree(np.array([x, y]).T) if len(list(itertools.combinations(np.arange(len(x)), 2))) == len( kdT_tmp.query_pairs(2, p=1) ): # if connected components is too large (likely cover more signals) then split it df = pd.Series( data={ 'I_T': np.nan, 'I_G': np.nan, 'I_C': np.nan, 'I_A': np.nan, 'x_T': np.nan, 'y_T': np.nan, 'x_G': np.nan, 'y_G': np.nan, 'x_C': np.nan, 'y_C': np.nan, 'x_A': np.nan, 'y_A': np.nan, 'cycle': cycle }) df = df[[ 'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G', 'x_C', 'y_C', 'x_A', 'y_A', 'cycle' ]] for j in range(len(x)): df.iloc[ch[j]] = im_th[cycle, ch[j] + 2, y[j], x[j]] df.iloc[ch[j] * 2 + 4] = x[j] df.iloc[ch[j] * 2 + 4 + 1] = y[j] I = df['I_T':'I_A'] col = I[I == np.nanmax(I)].index[0] #retrieving the column tomove = df.index.get_loc( col) #column index to reach the correct columns coordinates x_ch = int(df[tomove * 2 + 4]) y_ch = int(df[tomove * 2 + 4 + 1]) ch_idx = tomove cycle = int(df['cycle']) rect = normalized_images[cycle, ch_idx + 2, y_ch - 2:y_ch + 3, x_ch - 2:x_ch + 3] if not rect.size == 0: rect = (rect - np.amin(rect)) / (np.amax(rect) - np.amin(rect)) rect = rect - np.mean(rect) row = pd.Series( data={ 'cycle': cycle, 'ch': ch_idx + 2, 'x': x_ch, 'y': y_ch, 'Intensities_window_5x5': rect }) inputs_df = inputs_df.append(row, ignore_index=True) max_df = max_df.append(df, ignore_index=True) else: coords = np.vstack((x, y)) coords_unique = np.unique(coords, axis=1) for j in range(coords_unique.shape[-1]): coords_tmp = coords_unique[:, j][:, np.newaxis] coords_idx = np.argwhere(np.all(coords == coords_tmp, axis=0)).reshape((-1, )) df = pd.Series( data={ 'I_T': np.nan, 'I_G': np.nan, 'I_C': np.nan, 'I_A': np.nan, 'x_T': np.nan, 'y_T': np.nan, 'x_G': np.nan, 'y_G': np.nan, 'x_C': np.nan, 'y_C': np.nan, 'x_A': np.nan, 'y_A': np.nan, 'cycle': cycle }) df = df[[ 'I_T', 'I_G', 'I_C', 'I_A', 'x_T', 'y_T', 'x_G', 'y_G', 'x_C', 'y_C', 'x_A', 'y_A', 'cycle' ]] for k in coords_idx: df.iloc[ch[k]] = im_th[cycle, ch[k] + 2, y[k], x[k]] df.iloc[ch[k] * 2 + 4] = x[k] df.iloc[ch[k] * 2 + 4 + 1] = y[k] I = df['I_T':'I_A'] col = I[I == np.nanmax(I)].index[0] #retrieving the column tomove = df.index.get_loc( col ) #column index to reach the correct columns coordinates x_ch = int(df[tomove * 2 + 4]) y_ch = int(df[tomove * 2 + 4 + 1]) ch_idx = tomove cycle = int(df['cycle']) rect = normalized_images[cycle, ch_idx + 2, y_ch - 2:y_ch + 3, x_ch - 2:x_ch + 3] if not rect.size == 0: rect = (rect - np.amin(rect)) / (np.amax(rect) - np.amin(rect)) rect = rect - np.mean(rect) row = pd.Series( data={ 'cycle': cycle, 'ch': ch_idx + 2, 'x': x_ch, 'y': y_ch, 'Intensities_window_5x5': rect }) inputs_df = inputs_df.append(row, ignore_index=True) max_df = max_df.append(df, ignore_index=True) return {'max_df': max_df, 'inputs_df': inputs_df}
class Spade: """ Class implementing Peng Qiu's SPADE algorithm, following S8 in the supplemental methods of his Nature Paper. """ nsamples = 2000 distance_metric = 1 distance_threshold = None alpha = 5 # if distance_threshold is none, then distance_threshold = median_min_dist * alpha def __init__(self, data, use_KD_tree = True): # We assume that data comes in the format stored in Flowdata class self.data = data.transpose() self.use_KD_tree = use_KD_tree if self.use_KD_tree: self._init_KD_tree() if self.use_KD_tree is False: self.kd_tree = None def run(self): """ Apply SPADE algorithm """ # Step 1: apply density dependent downsampling self.estimate_median_dist() self.compute_local_density() self.downsample() def _init_KD_tree(self): self.kd_tree = KDTree(self.data) def estimate_median_dist(self): """Estimate the median distance between cells. This is used to compute """ # Randomly selected indices if self.nsamples >= self.data.shape[1]: index = np.random.choice(self.data.shape[0], self.nsamples, replace = False) x = self.data[index,:] else: index = np.range(0,self.data.shape[0]) x = self.data # which ell_p norm is used if self.use_KD_tree: # We need to take the first two points (k=2), since distance of the point # to itself is zero. (dist, i) = self.kd_tree.query(x, k=2, p = self.distance_metric) dist = dist[:,1] else: dist = np.zeros(self.nsamples) d = np.zeros(self.data.shape[0]) for j in range(self.nsamples): err = (np.abs(x[j] - self.data))**distance_metric np.sum(err,axis=1,out=d) # give infinite distance to the point with itself d[index[j]] = float('inf') dist[j] = d.min() self.median_dist = np.median(dist) if self.distance_threshold is None: self.distance_threshold = self.alpha*self.median_dist return self.median_dist def compute_local_density_using_pairs(self): local_density = np.zeros(self.data.shape[0]) if self.use_KD_tree: pairs = self.kd_tree.query_pairs(self.distance_threshold, p = self.distance_metric) print "Found {} pairs".format(len(pairs)) for p in pairs: local_density[p[0]] += 1 local_density[p[1]] += 1 print local_density.max() def compute_local_density(self): print self.distance_threshold # This approach seems slightly faster, likely due to decreased memory # requirements if self.use_KD_tree: local_density = np.zeros(self.data.shape[0]) for j in range(self.data.shape[0]): index = self.kd_tree.query_ball_point(self.data[j], self.distance_threshold, p = self.distance_metric) local_density[j] = len(index) -1 # A slightly slower approach, I am leaving here in case of later # version changes if self.use_KD_tree and False: index = self.kd_tree.query_ball_point(self.data, self.distance_threshold, p = self.distance_metric) local_density = map(lambda i: len(i) - 1, index) print local_density self.local_density = local_density return local_density def downsample(self): target_density = 10 outlier_density = 3 local_density = self.local_density # compute the probability of keeping vector # events that are in the outlier range prob = np.less_equal(outlier_density, local_density)*np.less(local_density,target_density) downsampled_data = self.data[prob,:] # events that are in high density regions prob2 = np.less(target_density, local_density)*(target_density/(local_density + 1e-14)) downsample_index = np.random.choice(self.data.shape[0], math.ceil(prob2.sum()), replace = False, p = prob2/prob2.sum()) downsampled_data = np.append(downsampled_data, self.data[downsample_index,:]) print downsampled_data.shape self.downsampled_data = downsampled_data
class KDicTree(dict): ''' Wrapper around the scipy.spatial.KDTree for labelled points. Use like dict to register or update points: tree = KDicTree({'1':(0,0), 2:(2,2), '3':(45,45)}) tree['1'] = (1, 1) tree['2'] = (5, 5) tree['3'] = (50, 50) Then use KDTree querys: tree.query_ball_point( (3, 3), 10 ) ['1', 2, '2'] Parameters ---------- data : labelled (N,K) dict The data points to be indexed, labelled in a dictionary. leafsize : int, optional The number of points at which the algorithm switches over to brute-force. Has to be positive. See Also -------- scipy.spatial.KDTree scipy.spatial.cKDTree ''' def __init__(self, data, leafsize=16): self.tree = None self.ids = [] # maps tree to dict keys self.altered = True self.leafsize = leafsize super().__init__(data) def __setitem__(self, key, point): '''Set point for self[key]''' super().__setitem__(key, point) self.altered = True def __delitem__(self, key): '''Delete self[key].''' super().__delitem__(key) self.altered = True def build_tree(self): '''Gets called automatically by a query.''' if not self.altered: return self.tree = KDTree(list(self.values()), leafsize=self.leafsize) self.ids = list(self.keys()) self.altered = False def map_ids(self, ids): '''Maps the result of Querys to dict keys.''' if isinstance(ids, (tuple, list, ndarray)): return tuple(map(self.map_ids, ids)) return self.ids[ids] def query(self, x, k=1, eps=0, p=2, distance_upper_bound=float("inf")): '''Query the kd-tree for nearest neighbors.''' self.build_tree() dists, ids = self.tree.query(x, k, eps, p, distance_upper_bound) return (dists, self.map_ids(ids)) def query_ball_point(self, x, r, p=2., eps=0): '''Find all points within distance r of point(s) x.''' self.build_tree() return self.map_ids(self.tree.query_ball_point(x, r, p, eps)) def query_pairs(self, r, p=2., eps=0): '''Find all pairs of points within a distance r.''' self.build_tree() return [ tuple(self.map_ids(pair)) for pair in self.tree.query_pairs(r, p=p, eps=eps) ]
def extract_edges_in_block(db_name, db_host, soft_mask_container, soft_mask_dataset, distance_threshold, evidence_threshold, graph_number, block): graph_provider = MongoDbGraphProvider( db_name, db_host, mode='r+', position_attribute=['z', 'y', 'x'], directed=False, edges_collection='edges_g{}'.format(graph_number)) if check_function(graph_provider.database, block, "edges_g{}".format(graph_number)): return 0 logger.debug("Finding edges in %s, reading from %s", block.write_roi, block.read_roi) start = time.time() soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset) graph = graph_provider[block.read_roi.intersect(soft_mask_array.roi)] if graph.number_of_nodes() == 0: logger.info("No nodes in roi %s. Skipping", block.read_roi) write_done(graph_provider.database, block, 'edges_g{}'.format(graph_number)) return 0 logger.debug("Read %d candidates in %.3fs", graph.number_of_nodes(), time.time() - start) start = time.time() """ candidates = [(candidate_id, np.array([data[d] for d in ['z', 'y', 'x']])) for candidate_id, data in graph.nodes(data=True) if 'z' in data] """ candidates = np.array( [[candidate_id] + [data[d] for d in ['z', 'y', 'x']] for candidate_id, data in graph.nodes(data=True) if 'z' in data], dtype=np.uint64) kdtree_start = time.time() kdtree = KDTree([[candidate[1], candidate[2], candidate[3]] for candidate in candidates]) #kdtree = KDTree(candidates[]) pairs = kdtree.query_pairs(distance_threshold, p=2.0, eps=0) logger.debug("Query pairs in %.3fs", time.time() - kdtree_start) soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset) voxel_size = np.array(soft_mask_array.voxel_size, dtype=np.uint32) soft_mask_roi = block.read_roi.snap_to_grid( voxel_size=voxel_size).intersect(soft_mask_array.roi) soft_mask_array_data = soft_mask_array.to_ndarray(roi=soft_mask_roi) sm_dtype = soft_mask_array_data.dtype if sm_dtype == np.uint8: # standard pipeline pm 0-255 pass elif sm_dtype == np.float32 or sm_dtype == np.float64: if not (soft_mask_array_data.min() >= 0 and soft_mask_array_data.max() <= 1): raise ValueError( "Provided soft_mask has dtype float but not in range [0,1], abort" ) else: soft_mask_array_data *= 255 else: raise ValueError("Soft mask dtype {} not understood".format(sm_dtype)) soft_mask_array_data = soft_mask_array_data.astype(np.float64) if evidence_threshold is not None: soft_mask_array_data = (soft_mask_array_data >= evidence_threshold * 255).astype(np.float64) * 255 offset = np.array(np.array(soft_mask_roi.get_offset()) / voxel_size, dtype=np.uint64) evidence_start = time.time() if pairs: pairs = np.array(list(pairs), dtype=np.uint64) evidence_array = cpp_get_evidence(candidates, pairs, soft_mask_array_data, offset, voxel_size) graph.add_weighted_edges_from(evidence_array, weight='evidence') logger.debug("Accumulate evidence in %.3fs", time.time() - evidence_start) logger.debug("Found %d edges", graph.number_of_edges()) logger.debug("Extracted edges in %.3fs", time.time() - start) start = time.time() graph.write_edges(block.write_roi) logger.debug("Wrote edges in %.3fs", time.time() - start) else: logger.debug("No pairs in block, skip") write_done(graph_provider.database, block, 'edges_g{}'.format(graph_number)) return 0