Python contingency_tableの例、neuclease.util.contingency_table Pythonの例

コード例 #1

0

ファイルを表示

ファイル: contingencytable.py プロジェクト: janelia-flyem/flyemflows

        def _contingency_table(box):
            left_vol = left_service.get_subvolume(box)
            right_vol = right_service.get_subvolume(box)

            table = contingency_table(left_vol, right_vol)
            table = table.sort_index().reset_index()

            # Compute sizes before filtering
            left_sizes = table.groupby('left')['voxel_count'].sum()
            right_sizes = table.groupby('right')['voxel_count'].sum()

            if len(left_subset_labels) > 0:
                # We keep rows if they match either of these criteria:
                #   1. they touch a left-subset label
                #   2. they touch a left label that intersects with one
                #      of the right labels from criteria 1.
                keep_left = left_sizes.index.intersection(
                    left_subset_labels)  # noqa
                keep_right = table.query(
                    'left in @keep_left')['right'].unique()  # noqa
                table = table.query(
                    'left in @keep_left or right in @keep_right')

            if min_overlap > 1:
                table = table.query('voxel_count >= @min_overlap')

            left_sizes = left_sizes.loc[table['left'].unique()].reset_index()
            right_sizes = right_sizes.loc[
                table['right'].unique()].reset_index()

            return table, left_sizes, right_sizes

コード例 #2

0

ファイルを表示

def test_contingencytable(setup_hdf5_inputs):
    template_dir, _config, left_vol, right_vol = setup_hdf5_inputs
    expected_table = contingency_table(left_vol, right_vol).reset_index()

    execution_dir, _workflow = launch_flow(template_dir, 1)

    output_table = pd.DataFrame(
        np.load(f"{execution_dir}/contingency_table.npy"))
    assert (output_table == expected_table).all().all()

コード例 #3

0

ファイルを表示

ファイル: test_segmentation.py プロジェクト: stuarteberg/pydvid

def test_contingency_table_simple():
    left = np.array([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4]])
    right = np.array([[0, 0, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 0]])

    table = contingency_table(left, right)
    assert isinstance(table, pd.Series)
    assert (np.array(table.index.values.tolist()) == [(0, 0), (0, 5), (1, 5),
                                                      (1, 6), (2, 6), (2, 7),
                                                      (3, 7), (3, 8),
                                                      (4, 0)]).all()
    assert (table == [2, 1, 2, 1, 2, 1, 2, 1, 1]).all()

コード例 #4

0

ファイルを表示

ファイル: test_segmentation.py プロジェクト: stuarteberg/pydvid

def test_contingency_table_random():
    a = np.random.randint(5, 10, size=(20, 20), dtype=np.uint32)
    b = np.random.randint(10, 15, size=(20, 20), dtype=np.uint32)
    table = contingency_table(a, b)

    for (val_a, val_b) in product(range(5, 10), range(10, 15)):
        expected_overlap = ((a == val_a) & (b == val_b)).sum()
        rows = pd.DataFrame(table).query('left == @val_a and right == @val_b')
        if expected_overlap == 0:
            assert len(rows) == 0
        else:
            assert len(rows) == 1
            assert rows['voxel_count'].iloc[0] == expected_overlap

コード例 #5

0

ファイルを表示

ファイル: test_contingencytable.py プロジェクト: janelia-flyem/flyemflows

def test_contingencytable(setup_hdf5_inputs):
    """
    TODO: Test the filtering options (left-subset-labels, min-overlap-size)
    """
    template_dir, _config, left_vol, right_vol = setup_hdf5_inputs
    expected_table = contingency_table(left_vol,
                                       right_vol).sort_index().reset_index()

    expected_left_sizes = expected_table.groupby('left')['voxel_count'].sum()
    expected_right_sizes = expected_table.groupby('right')['voxel_count'].sum()

    execution_dir, _workflow = launch_flow(template_dir, 1)

    with open(f"{execution_dir}/contingency_table.pkl", "rb") as f:
        output_table = pickle.load(f)
    with open(f"{execution_dir}/left_sizes.pkl", "rb") as f:
        left_sizes = pickle.load(f)
    with open(f"{execution_dir}/right_sizes.pkl", "rb") as f:
        right_sizes = pickle.load(f)

    assert (output_table == expected_table).all().all()
    assert (left_sizes == expected_left_sizes).all().all()
    assert (right_sizes == expected_right_sizes).all().all()

コード例 #6

0

ファイルを表示

ファイル: evaluate_mito_count_results.py プロジェクト: janelia-flyem/neuclease

def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz,
                          neighborhood_id, mito_res_scale_diff):
    """
    Determine how many non-trivial mito objects overlap with the given "neighborhood object",
    and return a table of their IDs and sizes.

    1. Download the neighborhood mask for the given neighborhood_id.
    2. Erode the neighborhood mask by 1 px (see note in the comment above).
    3. Fetch the mito segmentation for the voxels within the neighborhood.
    4. Fetch (from dvid) the sizes of each mito object.
    5. Filter out the mitos that are smaller than the minimum size that is
       actually used in our published mito analyses.
    6. Just for additional info, determine how many connected components
       are formed by the mito objects.
    7. Return the mito IDs, sizses, and CC info as a DataFrame.
    """
    # The neighborhood segmentation source
    protocol, url = mito_roi_source.split('://')[-2:]
    server, uuid, instance = url.split('/')
    server = f'{protocol}://{server}'

    origin_zyx = np.array(neighborhood_origin_xyz[::-1])
    box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS]

    # Align box to the analysis scale before scaling it.
    box = round_box(box, (2**ANALYSIS_SCALE))

    # Scale box
    box //= (2**ANALYSIS_SCALE)

    neighborhood_seg = fetch_labelmap_voxels(server,
                                             uuid,
                                             instance,
                                             box,
                                             scale=ANALYSIS_SCALE)
    neighborhood_mask = (neighborhood_seg == neighborhood_id)

    # This is equivalent to a 1-px erosion
    # See note above for why we do this.
    neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner')

    mito_seg = fetch_labelmap_voxels(*MITO_SEG,
                                     box,
                                     supervoxels=True,
                                     scale=ANALYSIS_SCALE -
                                     mito_res_scale_diff)
    assert neighborhood_mask.shape == mito_seg.shape
    mito_seg = np.where(neighborhood_mask, mito_seg, 0)

    # The mito segmentation includes little scraps and slivers
    # that were filtered out of the "real" mito set.
    # Filter those scraps out of our results here.
    mito_ids = set(pd.unique(mito_seg.ravel())) - {0}
    mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True)
    mito_sizes = mito_sizes.rename_axis('mito')
    mito_sizes *= (2**mito_res_scale_diff)**3

    # This is our main result: mito IDs (and their sizes)
    mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE]

    # Just for extra info, group the mitos we found into connected components.
    mito_mask = mask_for_labels(mito_seg, mito_sizes.index)
    mito_box = compute_nonzero_box(mito_mask)
    mito_mask = extract_subvol(mito_mask, mito_box)
    mito_seg = extract_subvol(mito_seg, mito_box)
    mito_cc = label(mito_mask, connectivity=1)
    ct = contingency_table(mito_seg, mito_cc).reset_index()
    ct = ct.rename(columns={
        'left': 'mito',
        'right': 'cc',
        'voxel_count': 'cc_size'
    })
    ct = ct.set_index('mito')
    mito_sizes = pd.DataFrame(mito_sizes).merge(ct,
                                                'left',
                                                left_index=True,
                                                right_index=True)
    return mito_sizes

コード例 #7

0

ファイルを表示

ファイル: contingencytable.py プロジェクト: davidackerman/flyemflows

 def _contingency_table(box):
     left_vol = left_service.get_subvolume(box)
     right_vol = right_service.get_subvolume(box)
     table = contingency_table(left_vol, right_vol)
     return table.reset_index()

コード例 #8

0

ファイルを表示

def select_hulls_for_mito_bodies(mito_body_ct,
                                 mito_bodies_mask,
                                 mito_binary,
                                 body_seg,
                                 hull_masks,
                                 seed_bodies,
                                 box,
                                 scale,
                                 viewer=None,
                                 res0=8,
                                 progress=False):

    mito_bodies_mito_seg = np.where(mito_bodies_mask & mito_binary, body_seg,
                                    0)
    nonmito_body_seg = np.where(mito_bodies_mask, 0, body_seg)

    hull_cc_overlap_stats = []
    for hull_cc, (mask_box, mask) in tqdm_proxy(hull_masks.items(),
                                                disable=not progress):
        mbms = mito_bodies_mito_seg[box_to_slicing(*mask_box)]
        masked_hull_cc_bodies = np.where(mask, mbms, 0)
        # Faster to check for any non-zero values at all before trying to count them.
        # This early check saves a lot of time in practice.
        if not masked_hull_cc_bodies.any():
            continue

        # This hull was generated from a particular seed body (non-mito body).
        # If it accidentally overlaps with any other non-mito bodies,
        # then delete those voxels from the hull.
        # If that causes the hull to become split apart into multiple connected components,
        # then keep only the component(s) which overlap the seed body.
        seed_body = seed_bodies[hull_cc]
        nmbs = nonmito_body_seg[box_to_slicing(*mask_box)]
        other_bodies = set(pd.unique(nmbs[mask])) - {0, seed_body}
        if other_bodies:
            # Keep only the voxels on mito bodies or on the
            # particular non-mito body for this hull (the "seed body").
            mbm = mito_bodies_mask[box_to_slicing(*mask_box)]
            mask[:] &= (mbm | (nmbs == seed_body))
            mask = vigra.taggedView(mask, 'zyx')
            mask_cc = vigra.analysis.labelMultiArrayWithBackground(
                mask.view(np.uint8))
            if mask_cc.max() > 1:
                mask_ct = contingency_table(mask_cc, nmbs).reset_index()
                keep_ccs = mask_ct['left'].loc[(mask_ct['left'] != 0) &
                                               (mask_ct['right'] == seed_body)]
                mask[:] = mask_for_labels(mask_cc, keep_ccs)

        mito_bodies, counts = np.unique(masked_hull_cc_bodies,
                                        return_counts=True)
        overlaps = pd.DataFrame({
            'mito_body': mito_bodies,
            'overlap': counts,
            'hull_cc': hull_cc,
            'hull_size': mask.sum(),
            'hull_body': seed_body
        })
        hull_cc_overlap_stats.append(overlaps)

    if len(hull_cc_overlap_stats) == 0:
        logger.warning("Could not find any matches for any mito bodies!")
        mito_body_ct['hull_body'] = np.uint64(0)
        return mito_body_ct

    hull_cc_overlap_stats = pd.concat(hull_cc_overlap_stats, ignore_index=True)
    hull_cc_overlap_stats = hull_cc_overlap_stats.query(
        'mito_body != 0').copy()

    # Aggregate the stats for each body and the hull bodies it overlaps with,
    # Select the hull_body with the most overlap, or in the case of ties, the hull body that is largest overall.
    # (Ties are probably more common in the event that two hulls completely encompass a small mito body.)
    hull_body_overlap_stats = hull_cc_overlap_stats.groupby(
        ['mito_body', 'hull_body'])[['overlap', 'hull_size']].sum()
    hull_body_overlap_stats = hull_body_overlap_stats.sort_values(
        ['mito_body', 'overlap', 'hull_size'], ascending=False)
    hull_body_overlap_stats = hull_body_overlap_stats.reset_index()

    mito_hull_selections = (hull_body_overlap_stats.drop_duplicates(
        'mito_body').set_index('mito_body')['hull_body'])
    mito_body_ct = mito_body_ct.merge(mito_hull_selections,
                                      'left',
                                      left_index=True,
                                      right_index=True)
    mito_body_ct['hull_body'] = mito_body_ct['hull_body'].fillna(0)

    dtypes = {col: np.float32 for col in mito_body_ct.columns}
    dtypes['hull_body'] = np.uint64
    mito_body_ct = mito_body_ct.astype(dtypes)

    if viewer:
        assert mito_hull_selections.index.dtype == mito_hull_selections.values.dtype == np.uint64
        mito_hull_mapper = LabelMapper(mito_hull_selections.index.values,
                                       mito_hull_selections.values)
        remapped_body_seg = mito_hull_mapper.apply(body_seg, True)
        remapped_body_seg = apply_mask_for_labels(remapped_body_seg,
                                                  mito_hull_selections.values)
        update_seg_layer(viewer, 'altered-bodies', remapped_body_seg, scale,
                         box)

        # Show the final hull masks (after erasure of non-target bodies)
        assert sorted(hull_masks.keys()) == [*range(1, 1 + len(hull_masks))]
        hull_cc_overlap_stats = hull_cc_overlap_stats.sort_values('hull_size')
        hull_seg = np.zeros_like(remapped_body_seg)
        for row in hull_cc_overlap_stats.itertuples():
            mask_box, mask = hull_masks[row.hull_cc]
            view = hull_seg[box_to_slicing(*mask_box)]
            view[:] = np.where(mask, row.hull_body, view)
        update_seg_layer(viewer, 'final-hull-seg', hull_seg, scale, box)

    return mito_body_ct

コード例 #9

0

ファイルを表示

def identify_mito_bodies(body_seg,
                         mito_binary,
                         box,
                         scale,
                         halo,
                         body_seg_dvid_src=None,
                         viewer=None,
                         res0=8,
                         resource_mgr_client=None):
    # Identify segments that are mostly mito
    ct = contingency_table(body_seg,
                           mito_binary).reset_index().rename(columns={
                               'left': 'body',
                               'right': 'is_mito'
                           })
    ct = ct.pivot(index='body', columns='is_mito',
                  values='voxel_count').fillna(0).rename(columns={
                      0: 'non_mito',
                      1: 'mito'
                  })
    if 'mito' not in ct or 'non_mito' not in ct:
        # Nothing to do if there aren't any mito voxels
        return None, None, None
    ct[['mito', 'non_mito']] *= ((2**scale)**3)

    ct['body_size_local'] = ct.eval('mito+non_mito')
    ct['mito_frac_local'] = ct.eval('mito/body_size_local')
    ct = ct.sort_values('mito_frac_local', ascending=False)

    # Also compute the halo vs. non-halo sizes of every body.
    central_box = (box - box[0]) + [[halo, halo, halo], [-halo, -halo, -halo]]
    central_body_seg = body_seg[box_to_slicing(*central_box)]
    central_sizes = (pd.Series(central_body_seg.ravel(
        'K')).value_counts().rename('body_size_central').rename_axis('body'))

    central_mask = np.ones(central_box[1] - central_box[0], bool)
    update_mask_layer(viewer, 'central-box', central_mask, scale,
                      central_box + box[0])

    ct = ct.merge(central_sizes, 'left', on='body').fillna(0)
    ct['halo_size'] = ct.eval('body_size_local - body_size_central')
    ct = ct.query('body != 0')

    # Immediately drop bodies that reside only in the halo
    ct = ct.query('body_size_central > 0').copy()

    # For the bodies that MIGHT pass the mito threshold (based on their local size)
    # fetch their global size, if a dvid source was provided.
    # If not, we'll just use the local size, which is less accurate but
    # faster since we've already got it.
    if body_seg_dvid_src is None:
        ct['body_size'] = ct['body_size_local']
    else:
        local_mito_bodies = ct.query(
            'mito_frac_local >= @MITO_EDGE_FRAC').index

        if resource_mgr_client is None:
            body_sizes = fetch_sizes(*body_seg_dvid_src,
                                     local_mito_bodies).rename('body_size')
        else:
            with resource_mgr_client.access_context(body_seg_dvid_src[0], True,
                                                    1, 1):
                body_sizes = fetch_sizes(*body_seg_dvid_src,
                                         local_mito_bodies).rename('body_size')

        ct = ct.merge(body_sizes, 'left', on='body')

    # Due to downsampling effects, bodies can be larger at scale-1 than at scale-0, especially for tiny volumes.
    ct['mito_frac_global_vol'] = np.minimum(ct.eval('mito/body_size'), 1.0)

    # Calculate the proportion of mito edge pixels
    body_edges = np.where(edge_mask(body_seg, 'both'), body_seg, np.uint64(0))
    edge_ct = contingency_table(
        body_edges, mito_binary).reset_index().rename(columns={
            'left': 'body',
            'right': 'is_mito'
        })
    edge_ct = edge_ct.pivot(index='body',
                            columns='is_mito',
                            values='voxel_count').fillna(0).rename(columns={
                                0: 'non_mito',
                                1: 'mito'
                            })

    # Surface area scales with square of resolution, not cube
    edge_ct[['mito', 'non_mito']] *= ((2**scale)**2)

    edge_ct['body_size_local'] = edge_ct.eval('mito+non_mito')
    edge_ct['mito_frac_local'] = edge_ct.eval('mito/body_size_local')
    edge_ct = edge_ct.sort_values('mito_frac_local', ascending=False)
    edge_ct = edge_ct.query('body != 0')

    full_ct = ct.merge(edge_ct, 'inner', on='body', suffixes=['_vol', '_edge'])
    q = ("body_size < @MAX_MITO_FRAGMENT_VOL"
         " and mito_frac_global_vol >= @MITO_VOL_FRAC"
         " and mito_frac_local_edge >= @MITO_EDGE_FRAC")
    filtered_ct = full_ct.query(q)

    mito_bodies = filtered_ct.index
    mito_bodies_mask = mask_for_labels(body_seg, mito_bodies)
    update_mask_layer(viewer, 'mito-bodies-mask', mito_bodies_mask, scale, box,
                      res0)

    if len(filtered_ct) == 0:
        return None, None, None

    return mito_bodies, mito_bodies_mask, filtered_ct.copy()