def _contingency_table(box): left_vol = left_service.get_subvolume(box) right_vol = right_service.get_subvolume(box) table = contingency_table(left_vol, right_vol) table = table.sort_index().reset_index() # Compute sizes before filtering left_sizes = table.groupby('left')['voxel_count'].sum() right_sizes = table.groupby('right')['voxel_count'].sum() if len(left_subset_labels) > 0: # We keep rows if they match either of these criteria: # 1. they touch a left-subset label # 2. they touch a left label that intersects with one # of the right labels from criteria 1. keep_left = left_sizes.index.intersection( left_subset_labels) # noqa keep_right = table.query( 'left in @keep_left')['right'].unique() # noqa table = table.query( 'left in @keep_left or right in @keep_right') if min_overlap > 1: table = table.query('voxel_count >= @min_overlap') left_sizes = left_sizes.loc[table['left'].unique()].reset_index() right_sizes = right_sizes.loc[ table['right'].unique()].reset_index() return table, left_sizes, right_sizes
def test_contingencytable(setup_hdf5_inputs): template_dir, _config, left_vol, right_vol = setup_hdf5_inputs expected_table = contingency_table(left_vol, right_vol).reset_index() execution_dir, _workflow = launch_flow(template_dir, 1) output_table = pd.DataFrame( np.load(f"{execution_dir}/contingency_table.npy")) assert (output_table == expected_table).all().all()
def test_contingency_table_simple(): left = np.array([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4]]) right = np.array([[0, 0, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 0]]) table = contingency_table(left, right) assert isinstance(table, pd.Series) assert (np.array(table.index.values.tolist()) == [(0, 0), (0, 5), (1, 5), (1, 6), (2, 6), (2, 7), (3, 7), (3, 8), (4, 0)]).all() assert (table == [2, 1, 2, 1, 2, 1, 2, 1, 1]).all()
def test_contingency_table_random(): a = np.random.randint(5, 10, size=(20, 20), dtype=np.uint32) b = np.random.randint(10, 15, size=(20, 20), dtype=np.uint32) table = contingency_table(a, b) for (val_a, val_b) in product(range(5, 10), range(10, 15)): expected_overlap = ((a == val_a) & (b == val_b)).sum() rows = pd.DataFrame(table).query('left == @val_a and right == @val_b') if expected_overlap == 0: assert len(rows) == 0 else: assert len(rows) == 1 assert rows['voxel_count'].iloc[0] == expected_overlap
def test_contingencytable(setup_hdf5_inputs): """ TODO: Test the filtering options (left-subset-labels, min-overlap-size) """ template_dir, _config, left_vol, right_vol = setup_hdf5_inputs expected_table = contingency_table(left_vol, right_vol).sort_index().reset_index() expected_left_sizes = expected_table.groupby('left')['voxel_count'].sum() expected_right_sizes = expected_table.groupby('right')['voxel_count'].sum() execution_dir, _workflow = launch_flow(template_dir, 1) with open(f"{execution_dir}/contingency_table.pkl", "rb") as f: output_table = pickle.load(f) with open(f"{execution_dir}/left_sizes.pkl", "rb") as f: left_sizes = pickle.load(f) with open(f"{execution_dir}/right_sizes.pkl", "rb") as f: right_sizes = pickle.load(f) assert (output_table == expected_table).all().all() assert (left_sizes == expected_left_sizes).all().all() assert (right_sizes == expected_right_sizes).all().all()
def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz, neighborhood_id, mito_res_scale_diff): """ Determine how many non-trivial mito objects overlap with the given "neighborhood object", and return a table of their IDs and sizes. 1. Download the neighborhood mask for the given neighborhood_id. 2. Erode the neighborhood mask by 1 px (see note in the comment above). 3. Fetch the mito segmentation for the voxels within the neighborhood. 4. Fetch (from dvid) the sizes of each mito object. 5. Filter out the mitos that are smaller than the minimum size that is actually used in our published mito analyses. 6. Just for additional info, determine how many connected components are formed by the mito objects. 7. Return the mito IDs, sizses, and CC info as a DataFrame. """ # The neighborhood segmentation source protocol, url = mito_roi_source.split('://')[-2:] server, uuid, instance = url.split('/') server = f'{protocol}://{server}' origin_zyx = np.array(neighborhood_origin_xyz[::-1]) box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS] # Align box to the analysis scale before scaling it. box = round_box(box, (2**ANALYSIS_SCALE)) # Scale box box //= (2**ANALYSIS_SCALE) neighborhood_seg = fetch_labelmap_voxels(server, uuid, instance, box, scale=ANALYSIS_SCALE) neighborhood_mask = (neighborhood_seg == neighborhood_id) # This is equivalent to a 1-px erosion # See note above for why we do this. neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner') mito_seg = fetch_labelmap_voxels(*MITO_SEG, box, supervoxels=True, scale=ANALYSIS_SCALE - mito_res_scale_diff) assert neighborhood_mask.shape == mito_seg.shape mito_seg = np.where(neighborhood_mask, mito_seg, 0) # The mito segmentation includes little scraps and slivers # that were filtered out of the "real" mito set. # Filter those scraps out of our results here. mito_ids = set(pd.unique(mito_seg.ravel())) - {0} mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True) mito_sizes = mito_sizes.rename_axis('mito') mito_sizes *= (2**mito_res_scale_diff)**3 # This is our main result: mito IDs (and their sizes) mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE] # Just for extra info, group the mitos we found into connected components. mito_mask = mask_for_labels(mito_seg, mito_sizes.index) mito_box = compute_nonzero_box(mito_mask) mito_mask = extract_subvol(mito_mask, mito_box) mito_seg = extract_subvol(mito_seg, mito_box) mito_cc = label(mito_mask, connectivity=1) ct = contingency_table(mito_seg, mito_cc).reset_index() ct = ct.rename(columns={ 'left': 'mito', 'right': 'cc', 'voxel_count': 'cc_size' }) ct = ct.set_index('mito') mito_sizes = pd.DataFrame(mito_sizes).merge(ct, 'left', left_index=True, right_index=True) return mito_sizes
def _contingency_table(box): left_vol = left_service.get_subvolume(box) right_vol = right_service.get_subvolume(box) table = contingency_table(left_vol, right_vol) return table.reset_index()
def select_hulls_for_mito_bodies(mito_body_ct, mito_bodies_mask, mito_binary, body_seg, hull_masks, seed_bodies, box, scale, viewer=None, res0=8, progress=False): mito_bodies_mito_seg = np.where(mito_bodies_mask & mito_binary, body_seg, 0) nonmito_body_seg = np.where(mito_bodies_mask, 0, body_seg) hull_cc_overlap_stats = [] for hull_cc, (mask_box, mask) in tqdm_proxy(hull_masks.items(), disable=not progress): mbms = mito_bodies_mito_seg[box_to_slicing(*mask_box)] masked_hull_cc_bodies = np.where(mask, mbms, 0) # Faster to check for any non-zero values at all before trying to count them. # This early check saves a lot of time in practice. if not masked_hull_cc_bodies.any(): continue # This hull was generated from a particular seed body (non-mito body). # If it accidentally overlaps with any other non-mito bodies, # then delete those voxels from the hull. # If that causes the hull to become split apart into multiple connected components, # then keep only the component(s) which overlap the seed body. seed_body = seed_bodies[hull_cc] nmbs = nonmito_body_seg[box_to_slicing(*mask_box)] other_bodies = set(pd.unique(nmbs[mask])) - {0, seed_body} if other_bodies: # Keep only the voxels on mito bodies or on the # particular non-mito body for this hull (the "seed body"). mbm = mito_bodies_mask[box_to_slicing(*mask_box)] mask[:] &= (mbm | (nmbs == seed_body)) mask = vigra.taggedView(mask, 'zyx') mask_cc = vigra.analysis.labelMultiArrayWithBackground( mask.view(np.uint8)) if mask_cc.max() > 1: mask_ct = contingency_table(mask_cc, nmbs).reset_index() keep_ccs = mask_ct['left'].loc[(mask_ct['left'] != 0) & (mask_ct['right'] == seed_body)] mask[:] = mask_for_labels(mask_cc, keep_ccs) mito_bodies, counts = np.unique(masked_hull_cc_bodies, return_counts=True) overlaps = pd.DataFrame({ 'mito_body': mito_bodies, 'overlap': counts, 'hull_cc': hull_cc, 'hull_size': mask.sum(), 'hull_body': seed_body }) hull_cc_overlap_stats.append(overlaps) if len(hull_cc_overlap_stats) == 0: logger.warning("Could not find any matches for any mito bodies!") mito_body_ct['hull_body'] = np.uint64(0) return mito_body_ct hull_cc_overlap_stats = pd.concat(hull_cc_overlap_stats, ignore_index=True) hull_cc_overlap_stats = hull_cc_overlap_stats.query( 'mito_body != 0').copy() # Aggregate the stats for each body and the hull bodies it overlaps with, # Select the hull_body with the most overlap, or in the case of ties, the hull body that is largest overall. # (Ties are probably more common in the event that two hulls completely encompass a small mito body.) hull_body_overlap_stats = hull_cc_overlap_stats.groupby( ['mito_body', 'hull_body'])[['overlap', 'hull_size']].sum() hull_body_overlap_stats = hull_body_overlap_stats.sort_values( ['mito_body', 'overlap', 'hull_size'], ascending=False) hull_body_overlap_stats = hull_body_overlap_stats.reset_index() mito_hull_selections = (hull_body_overlap_stats.drop_duplicates( 'mito_body').set_index('mito_body')['hull_body']) mito_body_ct = mito_body_ct.merge(mito_hull_selections, 'left', left_index=True, right_index=True) mito_body_ct['hull_body'] = mito_body_ct['hull_body'].fillna(0) dtypes = {col: np.float32 for col in mito_body_ct.columns} dtypes['hull_body'] = np.uint64 mito_body_ct = mito_body_ct.astype(dtypes) if viewer: assert mito_hull_selections.index.dtype == mito_hull_selections.values.dtype == np.uint64 mito_hull_mapper = LabelMapper(mito_hull_selections.index.values, mito_hull_selections.values) remapped_body_seg = mito_hull_mapper.apply(body_seg, True) remapped_body_seg = apply_mask_for_labels(remapped_body_seg, mito_hull_selections.values) update_seg_layer(viewer, 'altered-bodies', remapped_body_seg, scale, box) # Show the final hull masks (after erasure of non-target bodies) assert sorted(hull_masks.keys()) == [*range(1, 1 + len(hull_masks))] hull_cc_overlap_stats = hull_cc_overlap_stats.sort_values('hull_size') hull_seg = np.zeros_like(remapped_body_seg) for row in hull_cc_overlap_stats.itertuples(): mask_box, mask = hull_masks[row.hull_cc] view = hull_seg[box_to_slicing(*mask_box)] view[:] = np.where(mask, row.hull_body, view) update_seg_layer(viewer, 'final-hull-seg', hull_seg, scale, box) return mito_body_ct
def identify_mito_bodies(body_seg, mito_binary, box, scale, halo, body_seg_dvid_src=None, viewer=None, res0=8, resource_mgr_client=None): # Identify segments that are mostly mito ct = contingency_table(body_seg, mito_binary).reset_index().rename(columns={ 'left': 'body', 'right': 'is_mito' }) ct = ct.pivot(index='body', columns='is_mito', values='voxel_count').fillna(0).rename(columns={ 0: 'non_mito', 1: 'mito' }) if 'mito' not in ct or 'non_mito' not in ct: # Nothing to do if there aren't any mito voxels return None, None, None ct[['mito', 'non_mito']] *= ((2**scale)**3) ct['body_size_local'] = ct.eval('mito+non_mito') ct['mito_frac_local'] = ct.eval('mito/body_size_local') ct = ct.sort_values('mito_frac_local', ascending=False) # Also compute the halo vs. non-halo sizes of every body. central_box = (box - box[0]) + [[halo, halo, halo], [-halo, -halo, -halo]] central_body_seg = body_seg[box_to_slicing(*central_box)] central_sizes = (pd.Series(central_body_seg.ravel( 'K')).value_counts().rename('body_size_central').rename_axis('body')) central_mask = np.ones(central_box[1] - central_box[0], bool) update_mask_layer(viewer, 'central-box', central_mask, scale, central_box + box[0]) ct = ct.merge(central_sizes, 'left', on='body').fillna(0) ct['halo_size'] = ct.eval('body_size_local - body_size_central') ct = ct.query('body != 0') # Immediately drop bodies that reside only in the halo ct = ct.query('body_size_central > 0').copy() # For the bodies that MIGHT pass the mito threshold (based on their local size) # fetch their global size, if a dvid source was provided. # If not, we'll just use the local size, which is less accurate but # faster since we've already got it. if body_seg_dvid_src is None: ct['body_size'] = ct['body_size_local'] else: local_mito_bodies = ct.query( 'mito_frac_local >= @MITO_EDGE_FRAC').index if resource_mgr_client is None: body_sizes = fetch_sizes(*body_seg_dvid_src, local_mito_bodies).rename('body_size') else: with resource_mgr_client.access_context(body_seg_dvid_src[0], True, 1, 1): body_sizes = fetch_sizes(*body_seg_dvid_src, local_mito_bodies).rename('body_size') ct = ct.merge(body_sizes, 'left', on='body') # Due to downsampling effects, bodies can be larger at scale-1 than at scale-0, especially for tiny volumes. ct['mito_frac_global_vol'] = np.minimum(ct.eval('mito/body_size'), 1.0) # Calculate the proportion of mito edge pixels body_edges = np.where(edge_mask(body_seg, 'both'), body_seg, np.uint64(0)) edge_ct = contingency_table( body_edges, mito_binary).reset_index().rename(columns={ 'left': 'body', 'right': 'is_mito' }) edge_ct = edge_ct.pivot(index='body', columns='is_mito', values='voxel_count').fillna(0).rename(columns={ 0: 'non_mito', 1: 'mito' }) # Surface area scales with square of resolution, not cube edge_ct[['mito', 'non_mito']] *= ((2**scale)**2) edge_ct['body_size_local'] = edge_ct.eval('mito+non_mito') edge_ct['mito_frac_local'] = edge_ct.eval('mito/body_size_local') edge_ct = edge_ct.sort_values('mito_frac_local', ascending=False) edge_ct = edge_ct.query('body != 0') full_ct = ct.merge(edge_ct, 'inner', on='body', suffixes=['_vol', '_edge']) q = ("body_size < @MAX_MITO_FRAGMENT_VOL" " and mito_frac_global_vol >= @MITO_VOL_FRAC" " and mito_frac_local_edge >= @MITO_EDGE_FRAC") filtered_ct = full_ct.query(q) mito_bodies = filtered_ct.index mito_bodies_mask = mask_for_labels(body_seg, mito_bodies) update_mask_layer(viewer, 'mito-bodies-mask', mito_bodies_mask, scale, box, res0) if len(filtered_ct) == 0: return None, None, None return mito_bodies, mito_bodies_mask, filtered_ct.copy()