def __init__(self): self.voxel_size = Coordinate((40, 4, 4)) self.nodes = [ # corners Node(id=1, location=np.array((-200, -200, -200))), Node(id=2, location=np.array((-200, -200, 199))), Node(id=3, location=np.array((-200, 199, -200))), Node(id=4, location=np.array((-200, 199, 199))), Node(id=5, location=np.array((199, -200, -200))), Node(id=6, location=np.array((199, -200, 199))), Node(id=7, location=np.array((199, 199, -200))), Node(id=8, location=np.array((199, 199, 199))), # center Node(id=9, location=np.array((0, 0, 0))), Node(id=10, location=np.array((-1, -1, -1))), ] self.graph_spec = GraphSpec(roi=Roi((-100, -100, -100), (300, 300, 300))) self.array_spec = ArraySpec( roi=Roi((-200, -200, -200), (400, 400, 400)), voxel_size=self.voxel_size ) self.graph = Graph(self.nodes, [], self.graph_spec)
def __init__(self, graph): self.graph = graph self.graph_spec = GraphSpec(roi=Roi((-10, -10, -10), (30, 30, 30)), directed=False) self.component_1_nodes = [ Node(i, np.array([0, i, 0])) for i in range(10) ] + [Node(i + 10, np.array([i, 5, 0])) for i in range(10)] self.component_1_edges = ([Edge(i, i + 1) for i in range(9)] + [Edge(i + 10, i + 11) for i in range(9)] + [Edge(5, 15)]) self.component_2_nodes = [ Node(i + 20, np.array([i, 4, 0])) for i in range(10) ] self.component_2_edges = [Edge(i + 20, i + 21) for i in range(9)]
def __init__(self): self.voxel_size = Coordinate((1, 1, 1)) self.nodes = [ # corners Node(id=1, location=np.array((0, 4, 4))), Node(id=2, location=np.array((9, 4, 4))) ] self.edges = [ Edge(1, 2) ] self.graph_spec = GraphSpec(roi=Roi((0, 0, 0), (10, 10, 10))) self.graph = Graph(self.nodes, self.edges, self.graph_spec)
def process(self, batch, request: BatchRequest): outputs = Batch() voxel_size = batch[self.embeddings].spec.voxel_size offset = batch[self.embeddings].spec.roi.get_begin() embeddings = batch[self.embeddings].data candidates = batch[self.mask].data _, depth, height, width = embeddings.shape coordinates = np.meshgrid( np.arange(0, (depth - 0.5) * self.coordinate_scale[0], self.coordinate_scale[0]), np.arange(0, (height - 0.5) * self.coordinate_scale[1], self.coordinate_scale[1]), np.arange(0, (width - 0.5) * self.coordinate_scale[2], self.coordinate_scale[2]), indexing="ij", ) for i in range(len(coordinates)): coordinates[i] = coordinates[i].astype(np.float32) embedding = np.concatenate([embeddings, coordinates], 0) embedding = np.transpose(embedding, axes=[1, 2, 3, 0]) embedding = embedding.reshape(depth * width * height, -1) candidates = candidates.reshape(depth * width * height) embedding = embedding[candidates == 1, :] emst = mlp.emst(embedding)["output"] nodes = set() edges = [] for u, v, distance in emst: u = int(u) pos_u = embedding[u][-3:] / self.coordinate_scale * voxel_size v = int(v) pos_v = embedding[v][-3:] / self.coordinate_scale * voxel_size nodes.add(Node(u, location=pos_u + offset)) nodes.add(Node(v, location=pos_v + offset)) edges.append(Edge(u, v, attrs={self.distance_attr: distance})) graph_spec = request[self.mst] graph_spec.directed = False outputs[self.mst] = Graph(nodes, edges, graph_spec) logger.debug( f"OUTPUTS CONTAINS MST WITH {len(list(outputs[self.mst].nodes))} NODES" ) return outputs
def array_to_graph(self, array): # Override with local function sklearn.feature_extraction.image._make_edges_3d = _make_edges_3d s = array.data.shape # Identify connectivity t1 = time.time() adj_mat = grid_to_graph(n_x=s[0], n_y=s[1], n_z=s[2], mask=array.data) t2 = time.time() logger.debug(f"GRID TO GRAPH TOOK {t2-t1} SECONDS!") # Identify order of the voxels t1 = time.time() voxel_locs = compute_voxel_locs( mask=array.data, offset=array.spec.roi.get_begin(), scale=array.spec.voxel_size, ) t2 = time.time() logger.debug(f"COMPUTING VOXEL LOCS TOOK {t2-t1} SECONDS!") t1 = time.time() nodes = [ Node(node_id, voxel_loc) for node_id, voxel_loc in enumerate(voxel_locs) ] for a, b in zip(adj_mat.row, adj_mat.col): assert all( abs(voxel_locs[a] - voxel_locs[b]) <= array.spec.voxel_size ), f"{voxel_locs[a] - voxel_locs[b]}, {array.spec.voxel_size}" edges = [Edge(a, b) for a, b in zip(adj_mat.row, adj_mat.col) if a != b] graph = Graph(nodes, edges, GraphSpec(array.spec.roi, directed=False)) t2 = time.time() logger.debug(f"BUILDING GRAPH TOOK {t2-t1} SECONDS!") return graph
def process(self, batch, request: BatchRequest): outputs = Batch() voxel_size = batch[self.embeddings].spec.voxel_size roi = batch[self.embeddings].spec.roi offset = batch[self.embeddings].spec.roi.get_begin() spatial_dims = len(voxel_size) embeddings = batch[self.embeddings].data embeddings = embeddings.reshape((-1,) + embeddings.shape[-spatial_dims:]) maxima = batch[self.mask].data maxima = maxima.reshape((-1,) + maxima.shape[-spatial_dims:])[0] try: minimax_edges = maximin.maximin_tree_query_hd( embeddings.astype(np.float64), maxima.astype(np.uint8), decimate=self.decimate, ) except OSError as e: logger.warning( f"embeddings have shape: {embeddings.shape} and mask has shape: {maxima.shape}" ) raise e maximin_id = itertools.count(start=0) nodes = set() edges = [] ids = {} for a, b, cost in minimax_edges: a_id = ids.setdefault(a, next(maximin_id)) b_id = ids.setdefault(b, next(maximin_id)) a_loc = np.array(a) * voxel_size + offset b_loc = np.array(b) * voxel_size + offset assert roi.contains(a_loc), f"Roi {roi} does not contain {a_loc}" assert roi.contains(b_loc), f"Roi {roi} does not contain {b_loc}" nodes.add(Node(a_id, location=a_loc)) nodes.add(Node(b_id, location=b_loc)) edges.append(Edge(a_id, b_id, attrs={self.distance_attr: cost})) graph_spec = request[self.mst] graph_spec.directed = False outputs[self.mst] = Graph(nodes, edges, graph_spec) return outputs
def provide(self, request: BatchRequest) -> Batch: timing = Timing(self, "provide") timing.start() batch = Batch() for points_key in self.points: if points_key not in request: continue # Retrieve all points in the requested region using a kdtree for speed point_ids = self._query_kdtree( self.data.tree, ( np.array(request[points_key].roi.get_begin()), np.array(request[points_key].roi.get_end()), ), ) # To account for boundary crossings we must retrieve neighbors of all points # in the graph. This is too slow for large queries and less important points_subgraph = self._subgraph_points( point_ids, with_neighbors=len(point_ids) < len(self._graph.nodes) // 2) nodes = [ Node(id=node, location=attrs["location"], attrs=attrs) for node, attrs in points_subgraph.nodes.items() ] edges = [Edge(u, v) for u, v in points_subgraph.edges] return_graph = Graph(nodes, edges, GraphSpec(roi=request[points_key].roi)) # Handle boundary cases return_graph = return_graph.trim(request[points_key].roi) batch = Batch() batch.points[points_key] = return_graph logger.debug( "Graph points source provided {} points for roi: {}".format( len(list(batch.points[points_key].nodes)), request[points_key].roi)) logger.debug( f"Providing {len(list(points_subgraph.nodes))} nodes to {points_key}" ) timing.stop() batch.profiling_stats.add(timing) return batch
def _get_points(self, point_filter): filtered = self.data[point_filter][:,:self.ndims] if self.id_dim is not None: ids = self.data[point_filter][:,self.id_dim] else: ids = np.arange(len(self.data))[point_filter] return [ Node(id=i, location=p) for i, p in zip(ids, filtered) ]
def setup(self): self.points = [ Node(0, np.array([0, 10, 0])), Node(1, np.array([0, 30, 0])), Node(2, np.array([0, 50, 0])), Node(3, np.array([0, 70, 0])), Node(4, np.array([0, 90, 0])), ] self.provides( GraphKeys.TEST_POINTS, GraphSpec(roi=Roi((-100, -100, -100), (300, 300, 300))), ) self.provides( ArrayKeys.TEST_LABELS, ArraySpec( roi=Roi((-100, -100, -100), (300, 300, 300)), voxel_size=Coordinate((4, 1, 1)), interpolatable=False, ), )
def provide(self, request: BatchRequest) -> Batch: random.seed(request.random_seed) np.random.seed(request.random_seed) timing = Timing(self, "provide") timing.start() batch = Batch() roi = request[self.points].roi region_shape = roi.get_shape() trees = [] for _ in range(self.n_obj): for _ in range(100): root = np.random.random(len(region_shape)) * region_shape tree = self._grow_tree( root, Roi((0,) * len(region_shape), region_shape) ) if self.num_nodes[0] <= len(tree.nodes) <= self.num_nodes[1]: break trees.append(tree) # logger.info("{} trees got, expected {}".format(len(trees), self.n_obj)) trees_graph = nx.disjoint_union_all(trees) points = { node_id: Node(np.floor(node_attrs["pos"]) + roi.get_begin()) for node_id, node_attrs in trees_graph.nodes.items() } batch[self.points] = Graph(points, request[self.points], list(trees_graph.edges)) timing.stop() batch.profiling_stats.add(timing) # self._plot_tree(tree) return batch
def _parse_swc(self, filename: Path): """Read one point per line. If ``ndims`` is 0, all values in one line are considered as the location of the point. If positive, only the first ``ndims`` are used. If negative, all but the last ``-ndims`` are used. """ if "cube" in filename.name: return 0 tree = parse_swc( filename, self.transform_file, resolution=[self.scale[i] for i in self.transpose], transpose=self.transpose, ) assert len(list(nx.weakly_connected_components(tree))) == 1 points = [] for node, attrs in tree.nodes.items(): if not self.ignore_human_nodes or attrs["human_placed"]: points.append( Node( id=node, location=attrs["location"], attrs=attrs, )) human_edges = set() if self.ignore_human_nodes: for u, v in tree.edges: if u not in points or v not in points: human_edges.add(Edge(u, v)) edges = set(Edge(u, v) for (u, v) in tree.edges) if not self.directed: edges = edges | set(Edge(v, u) for u, v in tree.edges()) self._add_points_to_source(points, edges - human_edges) return len(list(nx.weakly_connected_components(tree)))
def setup(self): self.nodes = [ Node(id=0, location=np.array([0, 0, 0])), Node(id=1, location=np.array([0, 10, 0])), Node(id=2, location=np.array([0, 20, 0])), Node(id=3, location=np.array([0, 30, 0])), Node(id=4, location=np.array([0, 40, 0])), Node(id=5, location=np.array([0, 50, 0])), ] self.provides( GraphKeys.TEST_GRAPH, GraphSpec(roi=Roi((-100, -100, -100), (200, 200, 200))), ) self.provides( ArrayKeys.TEST_LABELS, ArraySpec( roi=Roi((-100, -100, -100), (200, 200, 200)), voxel_size=Coordinate((4, 1, 1)), interpolatable=False, ), )
def __read_syn_points(self, roi): """ read json file from dvid source, in json format to create for every location given """ if GraphKey.PRESYN in self.points_voxel_size: voxel_size = self.points_voxel_size[GraphKey.PRESYN] elif GraphKey.POSTSYN in self.points_voxel_size: voxel_size = self.points_voxel_size[GraphKey.POSTSYN] syn_file_json = self.__load_json_annotations( array_shape_voxel=roi.get_shape() // voxel_size, array_offset_voxel=roi.get_offset() // voxel_size, array_name=self.datasets[GraphKey.PRESYN]) presyn_points_dict, postsyn_points_dict = {}, {} location_to_location_id_dict, location_id_to_partner_locations = {}, {} for node_nr, node in enumerate(syn_file_json): # collect information kind = str(node['Kind']) location = np.asarray( (node['Pos'][2], node['Pos'][1], node['Pos'][0])) * voxel_size location_id = int(node_nr) # some synapses are wrongly annotated in dvid source, have 'Tag': null ???, they are skipped try: syn_id = int(node['Tags'][0][3:]) except: continue location_to_location_id_dict[str(location)] = location_id partner_locations = [] try: for relation in node['Rels']: partner_locations.append((np.asarray([ relation['To'][2], relation['To'][1], relation['To'][0] ])) * voxel_size) except: partner_locations = [] location_id_to_partner_locations[int(node_nr)] = partner_locations # check if property given, not always given props = {} if 'conf' in node['Prop']: props['conf'] = float(node['Prop']['conf']) if 'agent' in node['Prop']: props['agent'] = str(node['Prop']['agent']) if 'flagged' in node['Prop']: str_value_flagged = str(node['Prop']['flagged']) props['flagged'] = bool( distutils.util.strtobool(str_value_flagged)) if 'multi' in node['Prop']: str_value_multi = str(node['Prop']['multi']) props['multi'] = bool( distutils.util.strtobool(str_value_multi)) # create synPoint with information collected so far (partner_ids not completed yet) if kind == 'PreSyn': syn_point = Node(location=location, location_id=location_id, synapse_id=syn_id, partner_ids=[], props=props) presyn_points_dict[int(node_nr)] = deepcopy(syn_point) elif kind == 'PostSyn': syn_(location=location, location_id=location_id, synapse_id=syn_id, partner_ids=[], props=props) postsyn_points_dict[int(node_nr)] = deepcopy(syn_point) # add partner ids last_node_nr = len(syn_file_json) - 1 for current_syn_point_id in location_id_to_partner_locations.keys(): all_partner_ids = [] for partner_loc in location_id_to_partner_locations[ current_syn_point_id]: if location_to_location_id_dict.has_key(str(partner_loc)): all_partner_ids.append( int(location_to_location_id_dict[str(partner_loc)])) else: last_node_nr = last_node_nr + 1 assert not location_to_location_id_dict.has_key( str(partner_loc)) all_partner_ids.append(int(last_node_nr)) if current_syn_point_id in presyn_points_dict: presyn_points_dict[ current_syn_point_id].partner_ids = all_partner_ids elif current_syn_point_id in postsyn_points_dict: postsyn_points_dict[ current_syn_point_id].partner_ids = all_partner_ids else: raise Exception( "current syn_point id not found in any dictionary") return presyn_points_dict, postsyn_points_dict
def process(self, batch, request: BatchRequest): outputs = Batch() voxel_size = batch[self.intensities].spec.voxel_size roi = batch[self.intensities].spec.roi offset = batch[self.intensities].spec.roi.get_begin() spatial_dims = len(voxel_size) intensities = batch[self.intensities].data intensities = intensities.reshape((-1,) + intensities.shape[-spatial_dims:])[0] maxima = batch[self.mask].data maxima = maxima.reshape((-1,) + maxima.shape[-spatial_dims:])[0] logger.warning(f"{self.mask} has {maxima.sum()} maxima") if maxima.sum() < 2: minimax_edges = [] if self.dense_mst is not None: dense_minimax_edges = [] else: if self.dense_mst is not None: dense_minimax_edges, minimax_edges = maximin.maximin_tree_query_plus_decimated( intensities.astype(np.float64), maxima.astype(np.uint8), threshold=self.threshold, ) else: minimax_edges = maximin.maximin_tree_query( intensities.astype(np.float64), maxima.astype(np.uint8), decimate=self.decimate, threshold=self.threshold, ) maximin_id = itertools.count(start=0) nodes = set() edges = [] ids = {} for a, b, cost in minimax_edges: a_id = ids.setdefault(a, next(maximin_id)) b_id = ids.setdefault(b, next(maximin_id)) a_loc = np.array(a) * voxel_size + offset b_loc = np.array(b) * voxel_size + offset assert roi.contains(a_loc), f"Roi {roi} does not contain {a_loc}" assert roi.contains(b_loc), f"Roi {roi} does not contain {b_loc}" nodes.add(Node(a_id, location=a_loc)) nodes.add(Node(b_id, location=b_loc)) edges.append(Edge(a_id, b_id, attrs={self.distance_attr: 1 - cost})) graph_spec = request[self.mst] graph_spec.directed = False outputs[self.mst] = Graph(nodes, edges, graph_spec) if self.dense_mst is not None: maximin_id = itertools.count(start=0) nodes = set() edges = [] ids = {} for a, b, cost in dense_minimax_edges: a_id = ids.setdefault(a, next(maximin_id)) b_id = ids.setdefault(b, next(maximin_id)) a_loc = np.array(a) * voxel_size + offset b_loc = np.array(b) * voxel_size + offset assert roi.contains(a_loc), f"Roi {roi} does not contain {a_loc}" assert roi.contains(b_loc), f"Roi {roi} does not contain {b_loc}" nodes.add(Node(a_id, location=a_loc)) nodes.add(Node(b_id, location=b_loc)) edges.append(Edge(a_id, b_id, attrs={self.distance_attr: 1 - cost})) graph_spec = request[self.dense_mst] graph_spec.directed = False outputs[self.dense_mst] = Graph(nodes, edges, graph_spec) return outputs
def _get_points(self, point_filter): filtered = self.data[point_filter] ids = np.arange(len(self.data))[point_filter] return [Node(id=i, location=p) for i, p in zip(ids, filtered)]
def process(self, batch, request: BatchRequest): outputs = Batch() voxel_size = batch[self.embeddings].spec.voxel_size offset = batch[self.embeddings].spec.roi.get_begin() embeddings = batch[self.embeddings].data candidates = batch[self.candidates].to_nx_graph() _, depth, height, width = embeddings.shape coordinates = np.meshgrid( np.arange( 0, (depth - 0.5) * self.coordinate_scale[0], self.coordinate_scale[0] ), np.arange( 0, (height - 0.5) * self.coordinate_scale[1], self.coordinate_scale[1] ), np.arange( 0, (width - 0.5) * self.coordinate_scale[2], self.coordinate_scale[2] ), indexing="ij", ) for i in range(len(coordinates)): coordinates[i] = coordinates[i].astype(np.float32) embedding = np.concatenate([embeddings, coordinates], 0) embedding = np.transpose(embedding, axes=[1, 2, 3, 0]) embedding = embedding.reshape(depth * width * height, -1) nodes = set() edges = [] for i, component in enumerate(nx.connected_components(candidates)): candidates_array = np.zeros((depth, height, width), dtype=bool) locs_to_ids = {} for node in component: attrs = candidates.nodes[node] location = attrs["location"] voxel_location = tuple( int(x) for x in ((location - offset) // voxel_size) ) locs_to_ids[voxel_location] = node candidates_array[voxel_location] = True candidates_array = candidates_array.reshape(-1) component_embedding = embedding[candidates_array, :] logger.info( f"processing component {i} with " f"{len(component)} candidates" ) component_emst = mlp.emst(component_embedding)["output"] for u, v, distance in component_emst: u = int(u) pos_u = component_embedding[u][-3:] / self.coordinate_scale * voxel_size u_index = locs_to_ids[ tuple(int(np.round(x)) for x in (pos_u / voxel_size)) ] v = int(v) pos_v = component_embedding[v][-3:] / self.coordinate_scale * voxel_size v_index = locs_to_ids[ tuple(int(np.round(x)) for x in (pos_v / voxel_size)) ] nodes.add(Node(u_index, location=pos_u + offset)) nodes.add(Node(v_index, location=pos_v + offset)) edges.append( Edge(u_index, v_index, attrs={self.distance_attr: distance}) ) graph_spec = request[self.mst] graph_spec.directed = False logger.info( f"candidates has {candidates.number_of_nodes()} nodes and " f"{candidates.number_of_edges()} edges and " f"{len(list(nx.connected_components(candidates)))} components" ) outputs[self.mst] = Graph(nodes, edges, graph_spec) output_graph = outputs[self.mst].to_nx_graph() logger.info( f"output_graph has {output_graph.number_of_nodes()} nodes and " f"{output_graph.number_of_edges()} edges and " f"{len(list(nx.connected_components(output_graph)))} components" ) logger.debug( f"OUTPUTS CONTAINS MST WITH {len(list(outputs[self.mst].nodes))} NODES" ) return outputs