def process_brick(box, roi_brick_s5, labels): """ Args: box: A box at scale-0 roi_brick_s5: A volume of roi voxels at scale-5, taken from the region that corresponds to the box. It will be upsampled to to align with the data for the box. labels: A set of labels to include in the results. Other labels will be ignored. """ box = box // (2**scale) roi_brick = upsample(roi_brick_s5, 2**(5 - scale)) assert (roi_brick.shape == box_shape(box)).all(), \ f"{roi_brick.shape} does not match box {box.tolist()}" # Download seg, but erase everything except our bodies of interest. # Note: Service is already configured at the right scale. seg_brick = input_service.get_subvolume(box, scale) seg_brick = np.asarray(seg_brick, order='C') apply_mask_for_labels(seg_brick, labels, inplace=True) df = pd.DataFrame({ 'body': seg_brick.reshape(-1), 'roi_id': roi_brick.reshape(-1) }) stats = (df.groupby([ 'body', 'roi_id' ]).size().rename('voxels').reset_index().query('body != 0')) return stats
def test_apply_mask_for_labels(): volume = [[0, 2, 3], [4, 5, 0]] volume = np.asarray(volume) masked_volume = apply_mask_for_labels(volume, {2, 5, 9}) expected = [[0, 2, 0], [0, 5, 0]] assert (masked_volume == expected).all() apply_mask_for_labels(volume, {2, 5, 9}, inplace=True) expected = [[0, 2, 0], [0, 5, 0]] assert (volume == expected).all()
def test_copysegmentation_from_dvid_to_dvid_input_mask( setup_dvid_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_dvid_segmentation_input # make sure we get a fresh output output_segmentation_name = 'copyseg-with-input-mask-from-dvid' config["output"]["dvid"]["segmentation-name"] = output_segmentation_name # Add an offset, which is added to both the input volume AND the mask labels offset = 2000 config["copysegmentation"]["add-offset-to-ids"] = offset # Select some labels that don't extend throughout the whole volume selected_labels = pd.unique(volume[150, 64:128, 64:128].reshape(-1)) assert 0 not in selected_labels selected_coords = np.array( mask_for_labels(volume, selected_labels).nonzero()).transpose() selected_box = np.array( [selected_coords.min(axis=0), 1 + selected_coords.max(axis=0)]) input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1] subvol_box = box_intersection(input_box, selected_box) selected_subvol = extract_subvol(volume, subvol_box).copy() selected_subvol = apply_mask_for_labels(selected_subvol, selected_labels) config["copysegmentation"]["input-mask-labels"] = selected_labels.tolist() selected_subvol = np.where(selected_subvol, selected_subvol + offset, 0) expected_vol = np.zeros(volume.shape, np.uint64) overwrite_subvol(expected_vol, subvol_box, selected_subvol) setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
def process_brick(box, roi_brick_s5, labels): roi_brick = upsample(roi_brick_s5, 2**(5 - scale)) assert (roi_brick.shape == box_shape(box)).all() # Download seg, but erase everything except our bodies of interest. # Note: Service is already configured at the right scale. seg_brick = input_service.get_subvolume(box) seg_brick = np.asarray(seg_brick, order='C') apply_mask_for_labels(seg_brick, labels, inplace=True) df = pd.DataFrame({ 'body': seg_brick.reshape(-1), 'roi_id': roi_brick.reshape(-1) }) stats = (df.groupby([ 'body', 'roi_id' ]).size().rename('voxels').reset_index().query('body != 0')) return stats
def brick_cc(brick): orig_vol = brick.volume brick.compress() # Track the original max so we know what the first # available label is when we write the final results. orig_max = orig_vol.max() if subset_labels: orig_vol = apply_mask_for_labels(orig_vol, subset_labels) # Fast path for all-zero bricks if not orig_vol.any(): cc_vol = orig_vol cc_overlaps = pd.DataFrame({'orig': [], 'cc': []}, dtype=np.uint64) cc_max = np.uint64(0) else: cc_vol = skm.label(orig_vol, background=0, connectivity=1) assert cc_vol.dtype == np.int64 cc_vol = cc_vol.view(np.uint64) # Leave 0-pixels alone. cc_vol[orig_vol == 0] = np.uint64(0) # Keep track of which original values each cc corresponds to. cc_overlaps = pd.DataFrame({'orig': orig_vol.reshape(-1), 'cc': cc_vol.reshape(-1)}) cc_overlaps.query('orig != 0 and cc != 0', inplace=True) cc_overlaps = cc_overlaps.drop_duplicates() assert (cc_overlaps.dtypes == np.uint64).all() if len(cc_overlaps) > 0: cc_max = cc_overlaps['cc'].max() else: cc_max = np.uint64(0) cc_brick = Brick( brick.logical_box, brick.physical_box, cc_vol, location_id=brick.location_id, compression=brick.compression ) return cc_brick, cc_overlaps, cc_max, orig_max
def _fetch_body_mito_seg(mito_src, body_mask, mask_box, scale, valid_mito_mapper, logger): """ Return the mito segmentation for only those mitos which overlap with the given body mask (not elsewhere). Args: mito_src: VolumeService to obtain mito segmentation body_mask: Volume with labels 1+2 as described in _fetch_body_mask() valid_mito_mapper: LabelMapper that keeps only valid mitos when its apply_with_default() method is called. """ with Timer("Fetching mito segmentation", logger): assert _have_flyemflows and isinstance(mito_src, VolumeService) mito_seg = mito_src.get_subvolume(mask_box, scale) if valid_mito_mapper: return valid_mito_mapper.apply_with_default(mito_seg) core_body_mask = (body_mask == 2) body_mito_seg = np.where(core_body_mask, mito_seg, 0) # Due to downsampling discrepancies between the mito seg and neuron seg, # mito from neighboring neurons may slightly overlap this neuron. # Keep only mitos which have more of their voxels in the body mask than not. # # FIXME: # This heuristic fails at the volume edge, where we might see just # part of the mito. # Need to overwrite small mitos on the volume edge with FACE_MARKER # to indicate that they can't be trusted, and if such a mito is # the "winning" mito, then we need to try a different search config. body_mito_sizes = pd.Series(body_mito_seg.ravel()).value_counts() del body_mito_seg mito_sizes = pd.Series(mito_seg.ravel()).value_counts() mito_sizes, body_mito_sizes = mito_sizes.align(body_mito_sizes, fill_value=0) core_mitos = {*mito_sizes[(body_mito_sizes > mito_sizes / 2)].index} - {0} core_mito_seg = apply_mask_for_labels(mito_seg, core_mitos, inplace=True) return core_mito_seg
def select_hulls_for_mito_bodies(mito_body_ct, mito_bodies_mask, mito_binary, body_seg, hull_masks, seed_bodies, box, scale, viewer=None, res0=8, progress=False): mito_bodies_mito_seg = np.where(mito_bodies_mask & mito_binary, body_seg, 0) nonmito_body_seg = np.where(mito_bodies_mask, 0, body_seg) hull_cc_overlap_stats = [] for hull_cc, (mask_box, mask) in tqdm_proxy(hull_masks.items(), disable=not progress): mbms = mito_bodies_mito_seg[box_to_slicing(*mask_box)] masked_hull_cc_bodies = np.where(mask, mbms, 0) # Faster to check for any non-zero values at all before trying to count them. # This early check saves a lot of time in practice. if not masked_hull_cc_bodies.any(): continue # This hull was generated from a particular seed body (non-mito body). # If it accidentally overlaps with any other non-mito bodies, # then delete those voxels from the hull. # If that causes the hull to become split apart into multiple connected components, # then keep only the component(s) which overlap the seed body. seed_body = seed_bodies[hull_cc] nmbs = nonmito_body_seg[box_to_slicing(*mask_box)] other_bodies = set(pd.unique(nmbs[mask])) - {0, seed_body} if other_bodies: # Keep only the voxels on mito bodies or on the # particular non-mito body for this hull (the "seed body"). mbm = mito_bodies_mask[box_to_slicing(*mask_box)] mask[:] &= (mbm | (nmbs == seed_body)) mask = vigra.taggedView(mask, 'zyx') mask_cc = vigra.analysis.labelMultiArrayWithBackground( mask.view(np.uint8)) if mask_cc.max() > 1: mask_ct = contingency_table(mask_cc, nmbs).reset_index() keep_ccs = mask_ct['left'].loc[(mask_ct['left'] != 0) & (mask_ct['right'] == seed_body)] mask[:] = mask_for_labels(mask_cc, keep_ccs) mito_bodies, counts = np.unique(masked_hull_cc_bodies, return_counts=True) overlaps = pd.DataFrame({ 'mito_body': mito_bodies, 'overlap': counts, 'hull_cc': hull_cc, 'hull_size': mask.sum(), 'hull_body': seed_body }) hull_cc_overlap_stats.append(overlaps) if len(hull_cc_overlap_stats) == 0: logger.warning("Could not find any matches for any mito bodies!") mito_body_ct['hull_body'] = np.uint64(0) return mito_body_ct hull_cc_overlap_stats = pd.concat(hull_cc_overlap_stats, ignore_index=True) hull_cc_overlap_stats = hull_cc_overlap_stats.query( 'mito_body != 0').copy() # Aggregate the stats for each body and the hull bodies it overlaps with, # Select the hull_body with the most overlap, or in the case of ties, the hull body that is largest overall. # (Ties are probably more common in the event that two hulls completely encompass a small mito body.) hull_body_overlap_stats = hull_cc_overlap_stats.groupby( ['mito_body', 'hull_body'])[['overlap', 'hull_size']].sum() hull_body_overlap_stats = hull_body_overlap_stats.sort_values( ['mito_body', 'overlap', 'hull_size'], ascending=False) hull_body_overlap_stats = hull_body_overlap_stats.reset_index() mito_hull_selections = (hull_body_overlap_stats.drop_duplicates( 'mito_body').set_index('mito_body')['hull_body']) mito_body_ct = mito_body_ct.merge(mito_hull_selections, 'left', left_index=True, right_index=True) mito_body_ct['hull_body'] = mito_body_ct['hull_body'].fillna(0) dtypes = {col: np.float32 for col in mito_body_ct.columns} dtypes['hull_body'] = np.uint64 mito_body_ct = mito_body_ct.astype(dtypes) if viewer: assert mito_hull_selections.index.dtype == mito_hull_selections.values.dtype == np.uint64 mito_hull_mapper = LabelMapper(mito_hull_selections.index.values, mito_hull_selections.values) remapped_body_seg = mito_hull_mapper.apply(body_seg, True) remapped_body_seg = apply_mask_for_labels(remapped_body_seg, mito_hull_selections.values) update_seg_layer(viewer, 'altered-bodies', remapped_body_seg, scale, box) # Show the final hull masks (after erasure of non-target bodies) assert sorted(hull_masks.keys()) == [*range(1, 1 + len(hull_masks))] hull_cc_overlap_stats = hull_cc_overlap_stats.sort_values('hull_size') hull_seg = np.zeros_like(remapped_body_seg) for row in hull_cc_overlap_stats.itertuples(): mask_box, mask = hull_masks[row.hull_cc] view = hull_seg[box_to_slicing(*mask_box)] view[:] = np.where(mask, row.hull_body, view) update_seg_layer(viewer, 'final-hull-seg', hull_seg, scale, box) return mito_body_ct
def _find_closest_approaches(volume, closest_scale, subset_groups): """ Given a volume and one or more groups of labels, find intra-group "edges" (label pairs) for objects in the given volume that are close to one another, but don't actually touch. Note: The subset_groups argument must contain a 'group_cc' column, which identifies nodes for which an edge has already been found during the "direct adjacencies" step of this workflow. This allows us to avoid looking for edges that have already been found, and also avoid looking at edges that aren't necessary to join a node with the rest of its group. Instead, we focus on the closest approaches between nodes that span from one component in a group to another component in that group. For example, if a particular group has three components [1,2,3], [4,5], [6], then we will only look for closest approaches between the following pairs of nodes: [1,4], [1,5], [1,6], [2,4], [2,5], [2,6], [4,6], [5,6] (We won't examine [1,2], [1,3], [2,3], [4,5], since they are known to belong to the same group already.) This saves computation, but omits some close-but-not-touching edges between nodes in the same group, just because some other path consisting of direct adjacencies can be found between those two points. Args: volume: 3D label volume, np.uint32 closest_scale: If closest_scale > 0, then the "closest" points will be computed after downsampling the mask for each object at the given scale. (The returned point is still guaranteed to fall within the object at scale 0, but it may be a pixel or two away from the true point of closest approach.) subset_groups: DataFrame with columns [label, group, group_cc]. Each grouped subset subset of labels is considered independently. Furthermore, we do not look for edges within the same group_cc, as explained above. Returns: DataFrame with columns: [label_a, label_b, za, ya, xa, zb, yb, xb, distance, edge_area] Note: ``edge_area`` will be 0 for all rows, since none of the body pairs physically touch in the volume (a precondition for the input). """ assert volume.ndim == 3 assert volume.dtype == np.uint32 subset_groups = subset_groups[['label', 'group', 'group_cc']] # We can only process groups that contain at least two labels. # If no group contains at least two labels, we're done. if subset_groups['group'].value_counts().max() == 1: return None # We only find edges from one CC to another. # (We don't bother looking for edges within a pre-established CC) # Therefore, if a group contains only 1 CC, we don't deal with it. cc_counts = subset_groups.groupby('group')['group_cc'].agg('nunique') _kept_groups = cc_counts[cc_counts >= 2].index subset_groups = subset_groups.query('group in @_kept_groups') if len(subset_groups) == 0: return None def distanceTransformUint8(volume): # For the watershed below, the distance transform input need not be super-precise, # and vigra's watersheds() function operates MUCH faster on uint8 data. dt = vigra.filters.distanceTransform(volume) dt = (255 * dt / dt.max()).astype(np.uint8) return dt def fill_gaps(volume): dt = distanceTransformUint8(volume) # The watersheds function annoyingly prints a bunch of useless messages to stdout, # so hide that stuff using this context manager. with stdout_redirected(): ws, _max_label = vigra.analysis.watersheds(dt, seeds=volume, method='Turbo') return ws subset_edges = [] for _group_id, group_df in subset_groups.groupby('group'): group_labels = pd.unique(group_df['label']) if len(group_labels) == 1: continue elif len(group_labels) <= 5: subset_edges.extend([*combinations(sorted(group_labels), 2)]) else: # Rather than computing pairwise distances between all labels, # Figure out which labels are close to each other by filling the # gaps in the image and computing direct adjacencies. masked_vol = apply_mask_for_labels(volume, group_df['label']) filled_vol = fill_gaps(masked_vol) edges_df = compute_dense_rag_table(filled_vol) subset_edges.extend(edges_df[['label_a', 'label_b' ]].drop_duplicates().values.tolist()) subset_edges = pd.DataFrame(subset_edges, columns=['label_a', 'label_b'], dtype=np.uint64) subset_edges = subset_edges.merge(subset_groups, 'left', left_on='label_a', right_on='label').drop('label', axis=1) subset_edges = subset_edges.merge(subset_groups, 'left', left_on='label_b', right_on='label', suffixes=['_a', '_b']).drop('label', axis=1) subset_edges = subset_edges.query( '(group_a == group_b) and (group_cc_a != group_cc_b)') subset_edges = subset_edges[['label_a', 'label_b']].drop_duplicates() result_rows = [] for (label_a, label_b) in subset_edges.values: coord_a, coord_b, distance = approximate_closest_approach( volume, label_a, label_b, closest_scale) # FIXME: Why is this check necessary? Both label_a and label_b should be present... if not np.isinf(distance): result_rows.append( (label_a, label_b, *coord_a, *coord_b, distance)) if len(result_rows) == 0: return None df = pd.DataFrame(result_rows, columns=[ 'label_a', 'label_b', 'za', 'ya', 'xa', 'zb', 'yb', 'xb', 'distance' ]) # These objects don't touch, so their edge area is 0. # (Don't call this function for objects that do touch) df['edge_area'] = np.int32(0) touching_df = df.query('distance <= 1.0') if len(touching_df) > 0: path = 'unexpected-touching-objects-remapped.npy' np.save(path, touching_df) msg = f"I didn't expect you to call this function with objects that physically touch! See {path}" raise RuntimeError(msg) return df.astype({ **EDGE_TABLE_TYPES, 'label_a': np.uint32, 'label_b': np.uint32 })