Exemple #1
0
        def process_brick(box, roi_brick_s5, labels):
            """
            Args:
                box:
                    A box at scale-0
                roi_brick_s5:
                    A volume of roi voxels at scale-5, taken
                    from the region that corresponds to the box.
                    It will be upsampled to to align with the data
                    for the box.
                labels:
                    A set of labels to include in the results.
                    Other labels will be ignored.
            """
            box = box // (2**scale)
            roi_brick = upsample(roi_brick_s5, 2**(5 - scale))
            assert (roi_brick.shape == box_shape(box)).all(), \
                f"{roi_brick.shape} does not match box {box.tolist()}"

            # Download seg, but erase everything except our bodies of interest.
            # Note: Service is already configured at the right scale.
            seg_brick = input_service.get_subvolume(box, scale)
            seg_brick = np.asarray(seg_brick, order='C')
            apply_mask_for_labels(seg_brick, labels, inplace=True)

            df = pd.DataFrame({
                'body': seg_brick.reshape(-1),
                'roi_id': roi_brick.reshape(-1)
            })

            stats = (df.groupby([
                'body', 'roi_id'
            ]).size().rename('voxels').reset_index().query('body != 0'))

            return stats
def test_apply_mask_for_labels():
    volume = [[0, 2, 3], [4, 5, 0]]

    volume = np.asarray(volume)

    masked_volume = apply_mask_for_labels(volume, {2, 5, 9})
    expected = [[0, 2, 0], [0, 5, 0]]
    assert (masked_volume == expected).all()

    apply_mask_for_labels(volume, {2, 5, 9}, inplace=True)
    expected = [[0, 2, 0], [0, 5, 0]]
    assert (volume == expected).all()
def test_copysegmentation_from_dvid_to_dvid_input_mask(
        setup_dvid_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_dvid_segmentation_input

    # make sure we get a fresh output
    output_segmentation_name = 'copyseg-with-input-mask-from-dvid'
    config["output"]["dvid"]["segmentation-name"] = output_segmentation_name

    # Add an offset, which is added to both the input volume AND the mask labels
    offset = 2000
    config["copysegmentation"]["add-offset-to-ids"] = offset

    # Select some labels that don't extend throughout the whole volume
    selected_labels = pd.unique(volume[150, 64:128, 64:128].reshape(-1))
    assert 0 not in selected_labels
    selected_coords = np.array(
        mask_for_labels(volume, selected_labels).nonzero()).transpose()
    selected_box = np.array(
        [selected_coords.min(axis=0), 1 + selected_coords.max(axis=0)])

    input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1]

    subvol_box = box_intersection(input_box, selected_box)
    selected_subvol = extract_subvol(volume, subvol_box).copy()
    selected_subvol = apply_mask_for_labels(selected_subvol, selected_labels)
    config["copysegmentation"]["input-mask-labels"] = selected_labels.tolist()

    selected_subvol = np.where(selected_subvol, selected_subvol + offset, 0)
    expected_vol = np.zeros(volume.shape, np.uint64)
    overwrite_subvol(expected_vol, subvol_box, selected_subvol)

    setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name
    _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
Exemple #4
0
        def process_brick(box, roi_brick_s5, labels):
            roi_brick = upsample(roi_brick_s5, 2**(5 - scale))
            assert (roi_brick.shape == box_shape(box)).all()

            # Download seg, but erase everything except our bodies of interest.
            # Note: Service is already configured at the right scale.
            seg_brick = input_service.get_subvolume(box)
            seg_brick = np.asarray(seg_brick, order='C')
            apply_mask_for_labels(seg_brick, labels, inplace=True)

            df = pd.DataFrame({
                'body': seg_brick.reshape(-1),
                'roi_id': roi_brick.reshape(-1)
            })

            stats = (df.groupby([
                'body', 'roi_id'
            ]).size().rename('voxels').reset_index().query('body != 0'))

            return stats
        def brick_cc(brick):
            orig_vol = brick.volume
            brick.compress()

            # Track the original max so we know what the first
            # available label is when we write the final results.
            orig_max = orig_vol.max()
            
            if subset_labels:
                orig_vol = apply_mask_for_labels(orig_vol, subset_labels)
            
            # Fast path for all-zero bricks
            if not orig_vol.any():
                cc_vol = orig_vol
                cc_overlaps = pd.DataFrame({'orig': [], 'cc': []}, dtype=np.uint64)
                cc_max = np.uint64(0)
            else:
                cc_vol = skm.label(orig_vol, background=0, connectivity=1)
                assert cc_vol.dtype == np.int64
                cc_vol = cc_vol.view(np.uint64)
                
                # Leave 0-pixels alone.
                cc_vol[orig_vol == 0] = np.uint64(0)
                
                # Keep track of which original values each cc corresponds to.
                cc_overlaps = pd.DataFrame({'orig': orig_vol.reshape(-1), 'cc': cc_vol.reshape(-1)})
                cc_overlaps.query('orig != 0 and cc != 0', inplace=True)
                cc_overlaps = cc_overlaps.drop_duplicates()
                assert (cc_overlaps.dtypes == np.uint64).all()
    
                if len(cc_overlaps) > 0:
                    cc_max = cc_overlaps['cc'].max()
                else:
                    cc_max = np.uint64(0)
            
            cc_brick = Brick( brick.logical_box,
                              brick.physical_box,
                              cc_vol,
                              location_id=brick.location_id,
                              compression=brick.compression )

            return cc_brick, cc_overlaps, cc_max, orig_max
def _fetch_body_mito_seg(mito_src, body_mask, mask_box, scale, valid_mito_mapper, logger):
    """
    Return the mito segmentation for only those mitos which
    overlap with the given body mask (not elsewhere).

    Args:
        mito_src:
            VolumeService to obtain mito segmentation
        body_mask:
            Volume with labels 1+2 as described in _fetch_body_mask()
        valid_mito_mapper:
            LabelMapper that keeps only valid mitos when its apply_with_default() method is called.
    """
    with Timer("Fetching mito segmentation", logger):
        assert _have_flyemflows and isinstance(mito_src, VolumeService)
        mito_seg = mito_src.get_subvolume(mask_box, scale)

    if valid_mito_mapper:
        return valid_mito_mapper.apply_with_default(mito_seg)

    core_body_mask = (body_mask == 2)
    body_mito_seg = np.where(core_body_mask, mito_seg, 0)

    # Due to downsampling discrepancies between the mito seg and neuron seg,
    # mito from neighboring neurons may slightly overlap this neuron.
    # Keep only mitos which have more of their voxels in the body mask than not.
    #
    # FIXME:
    #   This heuristic fails at the volume edge, where we might see just
    #   part of the mito.
    #   Need to overwrite small mitos on the volume edge with FACE_MARKER
    #   to indicate that they can't be trusted, and if such a mito is
    #   the "winning" mito, then we need to try a different search config.
    body_mito_sizes = pd.Series(body_mito_seg.ravel()).value_counts()
    del body_mito_seg
    mito_sizes = pd.Series(mito_seg.ravel()).value_counts()
    mito_sizes, body_mito_sizes = mito_sizes.align(body_mito_sizes, fill_value=0)
    core_mitos = {*mito_sizes[(body_mito_sizes > mito_sizes / 2)].index} - {0}
    core_mito_seg = apply_mask_for_labels(mito_seg, core_mitos, inplace=True)
    return core_mito_seg
Exemple #7
0
def select_hulls_for_mito_bodies(mito_body_ct,
                                 mito_bodies_mask,
                                 mito_binary,
                                 body_seg,
                                 hull_masks,
                                 seed_bodies,
                                 box,
                                 scale,
                                 viewer=None,
                                 res0=8,
                                 progress=False):

    mito_bodies_mito_seg = np.where(mito_bodies_mask & mito_binary, body_seg,
                                    0)
    nonmito_body_seg = np.where(mito_bodies_mask, 0, body_seg)

    hull_cc_overlap_stats = []
    for hull_cc, (mask_box, mask) in tqdm_proxy(hull_masks.items(),
                                                disable=not progress):
        mbms = mito_bodies_mito_seg[box_to_slicing(*mask_box)]
        masked_hull_cc_bodies = np.where(mask, mbms, 0)
        # Faster to check for any non-zero values at all before trying to count them.
        # This early check saves a lot of time in practice.
        if not masked_hull_cc_bodies.any():
            continue

        # This hull was generated from a particular seed body (non-mito body).
        # If it accidentally overlaps with any other non-mito bodies,
        # then delete those voxels from the hull.
        # If that causes the hull to become split apart into multiple connected components,
        # then keep only the component(s) which overlap the seed body.
        seed_body = seed_bodies[hull_cc]
        nmbs = nonmito_body_seg[box_to_slicing(*mask_box)]
        other_bodies = set(pd.unique(nmbs[mask])) - {0, seed_body}
        if other_bodies:
            # Keep only the voxels on mito bodies or on the
            # particular non-mito body for this hull (the "seed body").
            mbm = mito_bodies_mask[box_to_slicing(*mask_box)]
            mask[:] &= (mbm | (nmbs == seed_body))
            mask = vigra.taggedView(mask, 'zyx')
            mask_cc = vigra.analysis.labelMultiArrayWithBackground(
                mask.view(np.uint8))
            if mask_cc.max() > 1:
                mask_ct = contingency_table(mask_cc, nmbs).reset_index()
                keep_ccs = mask_ct['left'].loc[(mask_ct['left'] != 0) &
                                               (mask_ct['right'] == seed_body)]
                mask[:] = mask_for_labels(mask_cc, keep_ccs)

        mito_bodies, counts = np.unique(masked_hull_cc_bodies,
                                        return_counts=True)
        overlaps = pd.DataFrame({
            'mito_body': mito_bodies,
            'overlap': counts,
            'hull_cc': hull_cc,
            'hull_size': mask.sum(),
            'hull_body': seed_body
        })
        hull_cc_overlap_stats.append(overlaps)

    if len(hull_cc_overlap_stats) == 0:
        logger.warning("Could not find any matches for any mito bodies!")
        mito_body_ct['hull_body'] = np.uint64(0)
        return mito_body_ct

    hull_cc_overlap_stats = pd.concat(hull_cc_overlap_stats, ignore_index=True)
    hull_cc_overlap_stats = hull_cc_overlap_stats.query(
        'mito_body != 0').copy()

    # Aggregate the stats for each body and the hull bodies it overlaps with,
    # Select the hull_body with the most overlap, or in the case of ties, the hull body that is largest overall.
    # (Ties are probably more common in the event that two hulls completely encompass a small mito body.)
    hull_body_overlap_stats = hull_cc_overlap_stats.groupby(
        ['mito_body', 'hull_body'])[['overlap', 'hull_size']].sum()
    hull_body_overlap_stats = hull_body_overlap_stats.sort_values(
        ['mito_body', 'overlap', 'hull_size'], ascending=False)
    hull_body_overlap_stats = hull_body_overlap_stats.reset_index()

    mito_hull_selections = (hull_body_overlap_stats.drop_duplicates(
        'mito_body').set_index('mito_body')['hull_body'])
    mito_body_ct = mito_body_ct.merge(mito_hull_selections,
                                      'left',
                                      left_index=True,
                                      right_index=True)
    mito_body_ct['hull_body'] = mito_body_ct['hull_body'].fillna(0)

    dtypes = {col: np.float32 for col in mito_body_ct.columns}
    dtypes['hull_body'] = np.uint64
    mito_body_ct = mito_body_ct.astype(dtypes)

    if viewer:
        assert mito_hull_selections.index.dtype == mito_hull_selections.values.dtype == np.uint64
        mito_hull_mapper = LabelMapper(mito_hull_selections.index.values,
                                       mito_hull_selections.values)
        remapped_body_seg = mito_hull_mapper.apply(body_seg, True)
        remapped_body_seg = apply_mask_for_labels(remapped_body_seg,
                                                  mito_hull_selections.values)
        update_seg_layer(viewer, 'altered-bodies', remapped_body_seg, scale,
                         box)

        # Show the final hull masks (after erasure of non-target bodies)
        assert sorted(hull_masks.keys()) == [*range(1, 1 + len(hull_masks))]
        hull_cc_overlap_stats = hull_cc_overlap_stats.sort_values('hull_size')
        hull_seg = np.zeros_like(remapped_body_seg)
        for row in hull_cc_overlap_stats.itertuples():
            mask_box, mask = hull_masks[row.hull_cc]
            view = hull_seg[box_to_slicing(*mask_box)]
            view[:] = np.where(mask, row.hull_body, view)
        update_seg_layer(viewer, 'final-hull-seg', hull_seg, scale, box)

    return mito_body_ct
def _find_closest_approaches(volume, closest_scale, subset_groups):
    """
    Given a volume and one or more groups of labels,
    find intra-group "edges" (label pairs) for objects in the given volume that are
    close to one another, but don't actually touch.

    Note:
        The subset_groups argument must contain a 'group_cc' column,
        which identifies nodes for which an edge has already been found
        during the "direct adjacencies" step of this workflow.
        This allows us to avoid looking for edges that have already been found,
        and also avoid looking at edges that aren't necessary to join a node with
        the rest of its group.

        Instead, we focus on the closest approaches between nodes that span
        from one component in a group to another component in that group.

        For example, if a particular group has three components [1,2,3], [4,5], [6],
        then we will only look for closest approaches between the following pairs of nodes:
        [1,4], [1,5], [1,6], [2,4], [2,5], [2,6], [4,6], [5,6]
        (We won't examine [1,2], [1,3], [2,3], [4,5], since they are known to belong
        to the same group already.)

        This saves computation, but omits some close-but-not-touching edges between nodes
        in the same group, just because some other path consisting of direct adjacencies
        can be found between those two points.

    Args:
        volume:
            3D label volume, np.uint32

        closest_scale:
            If closest_scale > 0, then the "closest" points will be computed after
            downsampling the mask for each object at the given scale.
            (The returned point is still guaranteed to fall within the object at
            scale 0, but it may be a pixel or two away from the true point of
            closest approach.)

        subset_groups:
            DataFrame with columns [label, group, group_cc].
            Each grouped subset subset of labels is considered independently.
            Furthermore, we do not look for edges within the same group_cc,
            as explained above.

    Returns:
        DataFrame with columns:
            [label_a, label_b, za, ya, xa, zb, yb, xb, distance, edge_area]
        Note:
            ``edge_area`` will be 0 for all rows, since none of the body pairs
            physically touch in the volume (a precondition for the input).
    """
    assert volume.ndim == 3
    assert volume.dtype == np.uint32

    subset_groups = subset_groups[['label', 'group', 'group_cc']]

    # We can only process groups that contain at least two labels.
    # If no group contains at least two labels, we're done.
    if subset_groups['group'].value_counts().max() == 1:
        return None

    # We only find edges from one CC to another.
    # (We don't bother looking for edges within a pre-established CC)
    # Therefore, if a group contains only 1 CC, we don't deal with it.
    cc_counts = subset_groups.groupby('group')['group_cc'].agg('nunique')
    _kept_groups = cc_counts[cc_counts >= 2].index
    subset_groups = subset_groups.query('group in @_kept_groups')

    if len(subset_groups) == 0:
        return None

    def distanceTransformUint8(volume):
        # For the watershed below, the distance transform input need not be super-precise,
        # and vigra's watersheds() function operates MUCH faster on uint8 data.
        dt = vigra.filters.distanceTransform(volume)
        dt = (255 * dt / dt.max()).astype(np.uint8)
        return dt

    def fill_gaps(volume):
        dt = distanceTransformUint8(volume)

        # The watersheds function annoyingly prints a bunch of useless messages to stdout,
        # so hide that stuff using this context manager.
        with stdout_redirected():
            ws, _max_label = vigra.analysis.watersheds(dt,
                                                       seeds=volume,
                                                       method='Turbo')
        return ws

    subset_edges = []
    for _group_id, group_df in subset_groups.groupby('group'):
        group_labels = pd.unique(group_df['label'])
        if len(group_labels) == 1:
            continue
        elif len(group_labels) <= 5:
            subset_edges.extend([*combinations(sorted(group_labels), 2)])
        else:
            # Rather than computing pairwise distances between all labels,
            # Figure out which labels are close to each other by filling the
            # gaps in the image and computing direct adjacencies.
            masked_vol = apply_mask_for_labels(volume, group_df['label'])
            filled_vol = fill_gaps(masked_vol)
            edges_df = compute_dense_rag_table(filled_vol)
            subset_edges.extend(edges_df[['label_a', 'label_b'
                                          ]].drop_duplicates().values.tolist())

    subset_edges = pd.DataFrame(subset_edges,
                                columns=['label_a', 'label_b'],
                                dtype=np.uint64)

    subset_edges = subset_edges.merge(subset_groups,
                                      'left',
                                      left_on='label_a',
                                      right_on='label').drop('label', axis=1)
    subset_edges = subset_edges.merge(subset_groups,
                                      'left',
                                      left_on='label_b',
                                      right_on='label',
                                      suffixes=['_a', '_b']).drop('label',
                                                                  axis=1)

    subset_edges = subset_edges.query(
        '(group_a == group_b) and (group_cc_a != group_cc_b)')
    subset_edges = subset_edges[['label_a', 'label_b']].drop_duplicates()

    result_rows = []
    for (label_a, label_b) in subset_edges.values:
        coord_a, coord_b, distance = approximate_closest_approach(
            volume, label_a, label_b, closest_scale)

        # FIXME: Why is this check necessary? Both label_a and label_b should be present...
        if not np.isinf(distance):
            result_rows.append(
                (label_a, label_b, *coord_a, *coord_b, distance))

    if len(result_rows) == 0:
        return None

    df = pd.DataFrame(result_rows,
                      columns=[
                          'label_a', 'label_b', 'za', 'ya', 'xa', 'zb', 'yb',
                          'xb', 'distance'
                      ])

    # These objects don't touch, so their edge area is 0.
    # (Don't call this function for objects that do touch)
    df['edge_area'] = np.int32(0)

    touching_df = df.query('distance <= 1.0')
    if len(touching_df) > 0:
        path = 'unexpected-touching-objects-remapped.npy'
        np.save(path, touching_df)
        msg = f"I didn't expect you to call this function with objects that physically touch! See {path}"
        raise RuntimeError(msg)

    return df.astype({
        **EDGE_TABLE_TYPES, 'label_a': np.uint32,
        'label_b': np.uint32
    })