def remap_cc_to_final(orig_brick, cc_brick, wrapped_brick_mapping_df): """ Given an original brick and the corresponding CC brick, Relabel the CC brick according to the final label mapping, as provided in wrapped_brick_mapping_df. """ assert (orig_brick.logical_box == cc_brick.logical_box).all() assert (orig_brick.physical_box == cc_brick.physical_box).all() # Check for NaN, which implies that the mapping for this # brick is empty (no objects to relabel). if isinstance(wrapped_brick_mapping_df, float): assert np.isnan(wrapped_brick_mapping_df) final_vol = orig_brick.volume orig_brick.compress() else: # Construct mapper from only the rows we need cc_labels = pd.unique(cc_brick.volume.reshape(-1)) # @UnusedVariable mapping = wrapped_brick_mapping_df.df.query('cc in @cc_labels')[['cc', 'final_cc']].values mapper = LabelMapper(*mapping.transpose()) # Apply mapping to CC vol, writing zeros whereever the CC isn't mapped. final_vol = mapper.apply_with_default(cc_brick.volume, 0) # Overwrite zero voxels from the original segmentation. final_vol = np.where(final_vol, final_vol, orig_brick.volume) orig_brick.compress() cc_brick.compress() final_brick = Brick( orig_brick.logical_box, orig_brick.physical_box, final_vol, location_id=orig_brick.location_id, compression=orig_brick.compression ) return final_brick
def find_edges_in_brick(brick, closest_scale=None, subset_groups=[], subset_requirement=2): """ Find all pairs of adjacent labels in the given brick, and find the central-most point along the edge between them. (Edges to/from label 0 are discarded.) If closest_scale is not None, then non-adjacent pairs will be considered, according to a particular heuristic to decide which pairs to consider. Args: brick: A Brick to analyze closest_scale: If None, then consider direct (touching) adjacencies only. If not-None, then non-direct "adjacencies" (i.e. close-but-not-touching) are found. In that case `closest_scale` should be an integer >=0 indicating the scale at which the analysis will be performed. Higher scales are faster, but less precise. See ``neuclease.util.approximate_closest_approach`` for more information. subset_groups: A DataFrame with columns [label, group]. Only the given labels will be analyzed for adjacencies. Furthermore, edges (pairs) will only be returned if both labels in the edge are from the same group. subset_requirement: Whether or not both labels in each edge must be in subset_groups, or only one in each edge. (Currently, subset_requirement must be 2.) Returns: If the brick contains no edges at all (other than edges to label 0), return None. Otherwise, returns pd.DataFrame with columns: [label_a, label_b, forwardness, z, y, x, axis, edge_area, distance]. # fixme where label_a < label_b, 'axis' indicates which axis the edge crosses at the chosen coordinate, (z,y,x) is always given as the coordinate to the left/above/front of the edge (depending on the axis). If 'forwardness' is True, then the given coordinate falls on label_a and label_b is one voxel "after" it (to the right/below/behind the coordinate). Otherwise, the coordinate falls on label_b, and label_a is "after". And 'edge_area' is the total count of the voxels touching both labels. """ # Profiling indicates that query('... in ...') spends # most of its time in np.unique, believe it or not. # After looking at the implementation, I think it might help a # little if we sort the array first. brick_labels = np.sort(pd.unique(brick.volume.reshape(-1))) if (len(brick_labels) == 1) or (len(brick_labels) == 2 and (0 in brick_labels)): return None # brick is solid -- no possible edges # Drop labels that aren't even present subset_groups = subset_groups.query('label in @brick_labels').copy() # Drop groups that don't have enough members (usually 2) in this brick. group_counts = subset_groups['group'].value_counts() _kept_groups = group_counts.loc[(group_counts >= subset_requirement)].index subset_groups = subset_groups.query('group in @_kept_groups').copy() if len(subset_groups) == 0: return None # No possible edges to find in this brick. # Contruct a mapper that includes only the labels we'll keep. # (Other labels will be mapped to 0). # Also, the mapper converts to uint32 (required by _find_and_select_central_edges, # but also just good for RAM reasons). kept_labels = np.sort(np.unique(subset_groups['label'].values)) remapped_kept_labels = np.arange(1, len(kept_labels) + 1, dtype=np.uint32) mapper = LabelMapper(kept_labels, remapped_kept_labels) reverse_mapper = LabelMapper(remapped_kept_labels, kept_labels) # Construct RAG -- finds all edges in the volume, on a per-pixel basis. remapped_volume = mapper.apply_with_default(brick.volume, 0) brick.compress() remapped_subset_groups = subset_groups.copy() remapped_subset_groups['label'] = mapper.apply( subset_groups['label'].values) try: if closest_scale is None: best_edges_df = _find_and_select_central_edges( remapped_volume, remapped_subset_groups, subset_requirement) else: best_edges_df = _find_closest_approaches(remapped_volume, closest_scale, remapped_subset_groups) except: brick_name = f"{brick.logical_box[:,::-1].tolist()}" np.save(f'problematic-remapped-brick-{brick_name}.npy', remapped_volume) logger.error(f"Error in brick (XYZ): {brick_name}" ) # This will appear in the worker log. raise if best_edges_df is None: return None # Translate coordinates to global space best_edges_df.loc[:, ['za', 'ya', 'xa']] += brick.physical_box[0] best_edges_df.loc[:, ['zb', 'yb', 'xb']] += brick.physical_box[0] # Restore to original label set best_edges_df['label_a'] = reverse_mapper.apply( best_edges_df['label_a'].values) best_edges_df['label_b'] = reverse_mapper.apply( best_edges_df['label_b'].values) # Normalize swap_df_cols(best_edges_df, None, best_edges_df.eval('label_a > label_b'), ['a', 'b']) return best_edges_df