Example #1
0
def sample_labels(point_source, stats_df, check_scale, threads, processes):
    from neuclease.dvid import fetch_labels_batched
    from flyemflows.volumes import DvidVolumeService

    if isinstance(point_source, DvidVolumeService):
        return fetch_labels_batched(*point_source.instance_triple,
                                    stats_df[[*'zyx']] // (2**check_scale),
                                    supervoxels=point_source.supervoxels,
                                    scale=check_scale,
                                    batch_size=1000,
                                    threads=threads,
                                    processes=processes)

    import multiprocessing as mp
    import dask
    from dask.diagnostics import ProgressBar

    if threads:
        pool = mp.pool.ThreadPool(threads)
    else:
        pool = mp.pool.Pool(processes)

    dask.config.set(scheduler='processes')
    with pool, dask.config.set(pool=pool), ProgressBar():
        centroids = stats_df[[*'zyx']] // (2**check_scale)
        labels = point_source.sample_labels(centroids, scale=check_scale)

    return labels
Example #2
0
def test_fetch_labels_batched(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation')

    coords = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 0, 4], [0, 0, 4]]

    labels = fetch_labels_batched(*instance_info,
                                  coords,
                                  supervoxels=False,
                                  batch_size=2,
                                  threads=2)
    assert labels.dtype == np.uint64
    assert (labels == 1).all()  # See init_labelmap_nodes() in conftest.py

    labels = fetch_labels_batched(*instance_info,
                                  coords,
                                  supervoxels=True,
                                  batch_size=2,
                                  threads=2)
    assert labels.dtype == np.uint64
    assert (labels == [1, 1, 1, 2, 2,
                       2]).all()  # See init_labelmap_nodes() in conftest.py

    labels = fetch_labels_batched(*instance_info,
                                  coords,
                                  supervoxels=False,
                                  batch_size=2,
                                  processes=2)
    assert labels.dtype == np.uint64
    assert (labels == 1).all()  # See init_labelmap_nodes() in conftest.py

    labels = fetch_labels_batched(*instance_info,
                                  coords,
                                  supervoxels=True,
                                  batch_size=2,
                                  processes=2)
    assert labels.dtype == np.uint64
    assert (labels == [1, 1, 1, 2, 2,
                       2]).all()  # See init_labelmap_nodes() in conftest.py
def main():
    RESULTS_PKL_PATH = sys.argv[1]
    if len(sys.argv) == 3:
        PROCESSES = int(sys.argv[2])
    else:
        PROCESSES = 4

    # Calculate the difference in resolution between the stored mito segmentation and neuron segmenation.
    # If they differ, it must be by a power of 2.
    mito_res = fetch_info(*MITO_SEG)["Extended"]["VoxelSize"][0]
    assert mito_res % NEIGHBORHOOD_RES == 0
    assert np.log2(mito_res / NEIGHBORHOOD_RES) == int(np.log2(mito_res / NEIGHBORHOOD_RES)), \
        "This script assumes that the mito resolution and neighborhood resolution differ by a power of 2."
    mito_res_scale_diff = int(np.log2(mito_res // NEIGHBORHOOD_RES))

    with open(RESULTS_PKL_PATH, 'rb') as f:
        mc_df = pickle.load(f)

    new_names = {col: col.replace(' ', '_') for col in mc_df.columns}
    new_names['result'] = 'proofreader_count'
    mc_df = mc_df.rename(columns=new_names)

    print("Evaluating mito count results")
    results = compute_parallel(partial(_task_results, mito_res_scale_diff),
                               iter_batches(
                                   mc_df.drop_duplicates('neighborhood_id'),
                                   1),
                               total=len(mc_df),
                               processes=PROCESSES,
                               leave_progress=True,
                               ordered=False)

    cols = [
        'neighborhood_id', 'neighborhood_origin', 'proofreader_count',
        'mito_id_count', 'mito_ids', 'mito_sizes', 'num_ccs', 'mito_cc_ids',
        'mito_cc_sizes', 'ng_link'
    ]

    df = pd.DataFrame(results, columns=cols)

    # Add columns for cell type (from neuprint)
    print("Fetching neuron cell types")
    origins_df = pd.DataFrame(df['neighborhood_origin'].tolist(),
                              columns=[*'xyz'])
    df['body'] = fetch_labels_batched(*NEURON_SEG,
                                      origins_df[[*'zyx']].values,
                                      processes=8)
    neurons_df, _ = fetch_neurons(df['body'].unique())
    neurons_df = neurons_df.rename(columns={
        'bodyId': 'body',
        'type': 'body_type',
        'instance': 'body_instance'
    })
    df = df.merge(neurons_df[['body', 'body_type', 'body_instance']],
                  'left',
                  on='body')
    df['body_type'].fillna("", inplace=True)
    df['body_instance'].fillna("", inplace=True)

    # Append roi column
    print("Determining ROIs")
    determine_point_rois(*NEURON_SEG[:2], NEUPRINT_CLIENT.primary_rois,
                         origins_df)
    df['roi'] = origins_df['roi']

    # Results only
    path = 'mito-seg-counts.pkl'
    print(f"Writing {path}")
    with open(path, 'wb') as f:
        pickle.dump(df, f)

    path = 'mito-seg-counts.tab-delimited.csv'
    print(f"Writing {path}")
    df.to_csv(path, sep='\t', header=True, index=False)

    # Full results (with task info columns)
    df = df.merge(
        mc_df.drop(columns=['neighborhood_origin', 'proofreader_count']),
        'left',
        on='neighborhood_id')

    path = 'full-results-with-mito-seg-counts.pkl'
    print(f"Writing {path}")
    with open(path, 'wb') as f:
        pickle.dump(df, f)

    path = 'full-results-with-mito-seg-counts.tab-delimited.csv'
    print(f"Writing {path}")
    df.to_csv(path, sep='\t', header=True, index=False)

    print("DONE")
Example #4
0
def fetch_vnc_statuses(server, uuid):
    """
    Fetch all body statuses from the body annotation key-value,
    but also include all soma bodies (regardless of status)
    and bodies that were annotated in the neck.

    Also fetch the number of synapses for each.

    Example:

        .. code-block:: ipython

            In [72]: ann = fetch_vnc_statuses('emdata5.janelia.org:8400', '73f39bea795f48e18feafb033b544ae5')
            [2021-05-06 11:32:14,581] INFO Pre-sorting 15143 coordinates by block index...
            [2021-05-06 11:32:14,587] INFO Pre-sorting 15143 coordinates by block index took 0:00:00.006287
            [2021-05-06 11:32:14,588] INFO Fetching labels from DVID...
            [2021-05-06 11:32:26,116] INFO Fetching labels from DVID took 0:00:11.527091
            [2021-05-06 11:32:31,480] WARNING There are 129 duplicate bodies in the results, due to multi-soma and/or multi-cervical bodies!

            In [73]: ann.columns
            Out[73]:
            Index(['status', 'user', 'naming user', 'instance', 'status user', 'comment',
                'json', 'soma_x', 'soma_y', 'soma_z', 'has_soma', 'neck_x', 'neck_y',
                'neck_z', 'is_cervical'],
                dtype='object')

            In [75]: ann.query('has_soma or is_cervical')[['status', 'status user', 'has_soma', 'is_cervical',
                ...:                                       'soma_x', 'soma_y', 'soma_z', 'neck_x', 'neck_y', 'neck_z']]
            Out[75]:
                                   status  status user has_soma  is_cervical  soma_x  soma_y  soma_z  neck_x  neck_y  neck_z
            body
            10000   Prelim Roughly traced                 False         True       0       0       0   24481   36044   67070
            100000            Soma Anchor                  True        False   22959   20811    7254       0       0       0
            100002            Soma Anchor                  True        False   28216   35641   61443       0       0       0
            10002   Prelim Roughly traced                 False         True       0       0       0   23217   35252   67070
            100031  Prelim Roughly traced      smithc     False         True       0       0       0   23263   38354   67070
            ...                       ...         ...       ...          ...     ...     ...     ...     ...     ...     ...
            97550         Cervical Anchor                 False         True       0       0       0   23341   38451   67070
            99837   Prelim Roughly traced       cookm     False         True       0       0       0   22665   38397   67070
            0                                              True        False   14912   31188   19347       0       0       0
            0                                              True        False   23125   16634   12777       0       0       0
            167778                                         True        False   22324    6881   16642       0       0       0

            [17188 rows x 10 columns]

    """
    soma_df = fetch_sphere_annotations(server, uuid, 'soma-bookmarks',
                                       'segmentation')
    soma_df = soma_df[['body', *'xyz']]
    soma_df['has_soma'] = True
    soma_df = soma_df.rename(columns={k: f'soma_{k}' for k in 'xyz'})

    neck_df = fetch_all_elements(server, uuid, 'neck-points', format='pandas')
    neck_df = neck_df[[*'xyz']]
    neck_df['body'] = fetch_labels_batched(server,
                                           uuid,
                                           'segmentation',
                                           neck_df[[*'zyx']].values,
                                           processes=4,
                                           batch_size=1000)
    neck_df = neck_df.rename(columns={k: f'neck_{k}' for k in 'xyz'})
    neck_df['is_cervical'] = True

    ann_df = fetch_body_annotations(server, uuid, 'segmentation_annotations')
    ann_df = ann_df.reset_index()
    ann_df = ann_df.merge(soma_df, 'outer', on='body')
    ann_df = ann_df.merge(neck_df, 'outer', on='body')

    ann_df['has_soma'].fillna(False, inplace=True)
    ann_df['is_cervical'].fillna(False, inplace=True)

    for c in ann_df.columns:
        if c[-2:] in ('_x', '_y', '_z'):
            ann_df[c] = ann_df[c].fillna(0).astype(int)

    for c in ('status', 'user', 'naming user', 'instance', 'status user',
              'comment'):
        ann_df[c].fillna("", inplace=True)

    dupes = ann_df['body'].duplicated().sum()
    if dupes:
        logger.warn(
            f"There are {dupes} duplicate bodies in the results, due to multi-soma and/or multi-cervical bodies!"
        )

    del ann_df['body ID']

    ann_df = ann_df.set_index('body')

    bodies = ann_df.index.drop_duplicates().values
    ann_df['tbars'] = fetch_counts(server,
                                   uuid,
                                   'synapses_labelsz',
                                   bodies,
                                   'PreSyn',
                                   format='pandas')
    ann_df['psds'] = fetch_counts(server,
                                  uuid,
                                  'synapses_labelsz',
                                  bodies,
                                  'PostSyn',
                                  format='pandas')
    ann_df['synapses'] = ann_df.eval('tbars + psds')

    return ann_df
Example #5
0
def correct_centroids(config, stats_df, check_scale=0, verify=False, threads=0, processes=8):
    import numpy as np
    import pandas as pd

    from neuclease.util import tqdm_proxy, compute_parallel, Timer
    from neuclease.dvid import fetch_labels_batched
    from flyemflows.volumes import VolumeService, DvidVolumeService

    with Timer("Pre-sorting points by block", logger):
        stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0)
        stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64
        stats_df.sort_values(['bz', 'by', 'bx'], inplace=True)
        stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True)

    sparsevol_source = VolumeService.create_from_config(config['mito-sparsevol-source'])
    if config['mito-point-source'] is None:
        point_source = sparsevol_source
    else:
        point_source = VolumeService.create_from_config(config['mito-point-source'])

    if isinstance(point_source, DvidVolumeService):
        stats_df['centroid_label'] = fetch_labels_batched(*point_source.instance_triple,
                                                          stats_df[[*'zyx']] // (2**check_scale),
                                                          supervoxels=point_source.supervoxels,
                                                          scale=check_scale,
                                                          batch_size=1000,
                                                          threads=threads,
                                                          processes=processes)
    else:
        import multiprocessing as mp
        import dask
        from dask.diagnostics import ProgressBar

        if threads:
            pool = mp.pool.ThreadPool(threads)
        else:
            pool = mp.pool.Pool(processes)

        dask.config.set(scheduler='processes')
        with pool, dask.config.set(pool=pool), ProgressBar():
            centroids = stats_df[[*'zyx']] // (2**check_scale)
            stats_df['centroid_label'] = point_source.sample_labels( centroids, scale=check_scale )

    mismatched_mitos = stats_df.query('centroid_label != mito_id').index

    logger.info(f"Correcting {len(mismatched_mitos)} mismatched mito centroids")
    _find_mito = partial(find_mito, *sparsevol_source.instance_triple)
    mitos_and_coords = compute_parallel(_find_mito, mismatched_mitos, ordered=False, threads=threads, processes=processes)
    corrected_df = pd.DataFrame(mitos_and_coords, columns=['mito_id', *'zyx']).set_index('mito_id')
    stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']]
    stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted'

    # Sanity check: they should all be correct now!
    if verify:
        new_centroids = stats_df.loc[mismatched_mitos, [*'zyx']].values
        new_labels = fetch_labels_batched(*sparsevol_source.instance_triple,
                                          new_centroids,
                                          supervoxels=True,
                                          threads=threads,
                                          processes=processes)

        if (new_labels != mismatched_mitos).any():
            logger.error("Some mitos remained mismstached!")

    return stats_df
Example #6
0
def update_localized_edges(server, uuid, seg_instance, edges_df, processes=16):
    """
    Use the coordinates in the edge table to update the label_a/label_b
    columns (by fetching the labels from dvid at the given UUID).
    
    Then, since the labels MAY have changed, re-compute the central-most
    edges (for "direct" adjacencies) and closest-approaching edges (for
    nearby "adjacencies").  This takes a few minutes.
    """
    ref_seg = (server, uuid, seg_instance)

    # Update to latest node
    with Timer(f"Updating body labels for uuid {uuid[:4]}", logger):
        edges_df['label_a'] = fetch_labels_batched(*ref_seg,
                                                   edges_df[['za', 'ya',
                                                             'xa']].values,
                                                   processes=processes)
        edges_df['label_b'] = fetch_labels_batched(*ref_seg,
                                                   edges_df[['zb', 'yb',
                                                             'xb']].values,
                                                   processes=processes)
    swap_df_cols(edges_df, None, edges_df.eval('label_a > label_b'),
                 ('a', 'b'))

    # Discard already-merged edges
    edges_df = edges_df.query('label_a != label_b')

    # Now that we've relabeled some points, there may be duplicate edges in the table.
    # De-duplicate them by choosing the best ones.
    # (This takes a while)

    with Timer("Re-selecting central-most direct edges", logger):
        direct_edges_df = edges_df.loc[edges_df['distance'] == 1.0].copy()

        # If we really want to choose the *best* edge, we should do a proper centroid calculation.
        # But that takes a long time, and there aren't likely to be all that many cases where it makes a difference.
        #direct_edges_df = select_central_edges(direct_edges_df)

        # Instead, just drop duplicates in arbitrary order.
        direct_edges_df.drop_duplicates(['group', 'label_a', 'label_b'],
                                        inplace=True)

    with Timer("Re-selecting closest-approaching nearby edges", logger):
        # This doesn't take as long, partly because there are
        # usually fewer nearby edges than direct edges.
        nearby_edges_df = edges_df.loc[edges_df['distance'] >= 1.0]
        nearby_edges_df = select_closest_edges(nearby_edges_df)

    # FIXME: Should we also update the group_cc?
    # If any splits have occurred, I guess the group_cc is no longer a single component.
    # Bu when we analyze it for 'fragments', the results will be correct.
    # append_group_ccs(...)

    # Combine (direct first)
    edges_df = pd.concat((direct_edges_df, nearby_edges_df))

    # After updating, it's technically possible that a nearby
    # edge now has the same labels as a direct edge.
    # Drop duplicates so we keep only the direct edge.
    edges_df = edges_df.drop_duplicates(['group', 'label_a', 'label_b'],
                                        keep='first')

    return edges_df
Example #7
0
def extract_assignment_fragments(server,
                                 uuid,
                                 syn_instance,
                                 edge_table,
                                 boi_rois=None,
                                 min_tbars_in_roi=2,
                                 min_psds_in_roi=10,
                                 fragment_rois=None,
                                 processes=16,
                                 *,
                                 request_processes=None,
                                 synapse_table=None,
                                 boi_table=None,
                                 seg_instance=None,
                                 update_edges=False):
    """
    Using the edge table emitted from the FindAdjacencies workflow,
    emit a table of "fragments" (sets of bodies) which connect two
    "bodies of interest" (BOIs, described below).
    
    The emitted fragments be used to generate
    focused assignments and/or merge review assignments.
    
    Essentially, we construct an adjacency graph from the edge table,
    and then search for any paths that can connect two BOIs:
    
        BOI - b - b - b - ... - b - BOI
    
    The path from one BOI to another is called a "fragment".
    
    If the path contains only the two BOIs and no other bodies, then 
    the two BOIs are directly adjacent, with no intervening bodies:
    
        BOI - BOI

    In those cases, it is possible to create a "focused proofreading" task
    from the body pair.  In all other cases, you can create a "merge review"
    task for the fragment.  See the following functions:
    
        generate_mergereview_assignments_from_df()
        neuclease.focused.asssignments.generate_focused_assignments()
    
    Exactly which bodies are considered "bodies of interest" is determined
    by the presence of T-bars and PSDs within the specified ROIs (boi_rois,
     if provided), thresholded by the given criteria.  If no boi_rois are
    specified, then all T-bars and PSDs in the given bodies are counted.
    
    Additionally, the final fragment set can be filtered to exclude
    fragments that travel outside of a given list of ROIs.

    See the explanation of the edge_table parameter for an explanation of
    the FindAdjacencies output.

    Tip:
        To visualize the adjacency graph for a subset of rows in either
        the input edge table or the output tables, see display_graph(), below.

    Args:
        server, uuid, syn_instance:
            DVID synapse (annotation) instance
    
        edge_table:
            A DataFrame as explained below, or a filepath to a
            .npy file that can be loaded into one.
        
            The FindAdjacencies workflow finds the sites at which
            preselected bodies are adjacent to one another.
            
            The user provides a list of body "groups" which are analyzed independently.
            In addition to "direct" adjacencies between touching bodies (distance=1.0),
            the workflow can be configured to also search for near-adjacencies,
            in which bodies come close to each other without physically touching (distance > 1.0).
            Each adjacency is referred to as an edge, and the results are emitted as
            an "edge table" with the following columns:
        
                [label_a, label_b, za, ya, xa, zb, yb, xb, distance, group, group_cc]
        
            with the following definitions:
            
                label_a, label_b:
                    Body IDs (assuming the FindAdjacencies workflow was executed on a body input source)
    
                 za, ya, xa, zb, yb, xb:
                    Coordinates that fall within the body on each side of the edge.
    
                distance:
                    The euclidean distance between the two coordinates.
                    For "direct" adjacencies, distance is always 1.0.
                    For "nearby" adjacencies, distance is always > 1.0.
    
                group:
                    The original body groups the user selected for adjacency analysis.
                    The exact group ID values are arbitrary (not necessarily consecutive),
                    and were provided by the user that ran the FindAdjacencies workflow.
                    Note that one body may exist in more than one group.
                
                group_cc:
                    An independent subgraph is constructed for each group (from the group's 'edges').
                    A connected components analysis is then performed on each subgraph,
                    and a unique ID is assigned to each CC.
    
                    Although the connected components are computed on each group in isolation,
                    the assigned group_cc values are unique across all of the groups in the table.
    
                    The group_cc values are otherwise arbitrary. (That is, they aren't necessarily
                    consecutive, or related to the other CC IDs in their group.)
                    For example, group 123 might be found to contain two connected components,
                    labeled group_cc=53412 and group_cc=82344

        boi_rois:
            Optional.  List of ROI instance names.
            If provided, only T-bars and PSDs that fall within the given list of ROIs will be
            counted when determining which bodies are considered BOIs.  Otherwise, all synapses
            in the volume are considered.
        
        min_tbars_in_roi, min_psds_in_roi:
            The criteria for determining what counts as a BOI.
            As indicated in the argument names, only synapse points WITHIN the ROI(s)
            will be counted towards these requirements. 
        
        fragment_rois:
            Optional.  Any fragments that extend outside of the given list of ROIs
            will be discarded from the result, even though they contained BOIs
            that matched the BOI criteria.
        
        processes:
            Various steps in this function can be parallelized.
            This specifies how much parallelism to use.

        request_processes:
            By default, requests to DVID are also made in parallel,
            with parallelism set by the 'processes' argument.
            But if you would like to reduce (or increase) the processes used when
            fetching from dvid (e.g. to reduce burden on the dvid server),
            specify a separate parallelism level via request_processes.
        
        synapse_table:
            Optional.  If you already fetched the synapses from DVID
            (via fetch_synapses_in_batches() or fetch_roi_synapses()),
            you can provide it here (or a file path to a stored .npy file),
            in which case this function will not need to fetch the synapses from DVID.
            (Not needed at all if you're providing your own boi_table.)
        
        boi_table:
            Optional.
            Normally this function computes the boi_table directly from the synapse points,
            but if you already have it handy, you can pass it in here.
            It will still be filtered according to min_tbars_in_roi and min_psds_in_roi,
            so the BOIs used will be accurate as long as the table contains all of the BOIs
            you might be interested in, or more.
        
        seg_instance:
            By default, this BOIs in this table will be extracted from the segmentation
            instance that is associated with the given synapse annotation instance.
            But if you would like to use a different segmentation instance, provide it here.
        
        update_edges:
            If True, re-fetch the body label under each coordinate in the table,
            and re-select the "best" (most central) edge for body pairs with multiple edges.
            This takes a while to run. It's only necessary if your edge table is likely to
            be out-of-date with respect to the given UUID.
    
    Returns:
        (focused_fragments_df, mr_fragments_df, bois), where:
        
        focused_fragments_df:
            A DataFrame consisting of rows suitable for "focused proofreading",
            i.e. every row (edge) is a single-edge fragment.
        
        mr_fragments_df:
            A DataFrame consisting of edges that belong to fragments with more
            than one edge, meaning they are not suitable for "focused proofreading"
            and are instead suitable for "merge review".
            The fragment IDs are the (group_cc, cc_task) columns.
            Edges with the same fragment ID should be grouped together into the
            same merge review task.
        
        mr_endpoint_df:
            A DataFrame containing only the 'endpoint' bodies of the MR fragments,
            one pair per row.
            The columns in which the bodies are found (a vs b) will not be the same
            as they appear in mr_fragments_df, but the group_cc and cc_task columns
            will correspond to the appropriate rows in the full DataFrame.
            This 'endpoint' dataframe does not contain enough information to create
            merge review tasks (it lacks information about the intermediate bodies
            that connect the two endpoints) but it is more convenient to analyze
            when computing certain statistics to describe the types of merge review
            tasks that were found.
        
        boi_table:
            A DataFrame containing the BOIs (based on the criteria given above)
            that were used to selecting fragments, indexed by body, with
            columns ['PreSyn', 'PostSyn'].
            (See ``neuclease.dvid.annotation.determine_bodies_of_interest()``.)
            Note that the returned fragments do not necessarily cover
            every BOI in this list.
    """
    if isinstance(boi_rois, str):
        boi_rois = [boi_rois]

    if isinstance(fragment_rois, str):
        fragment_rois = [fragment_rois]

    request_processes = request_processes or processes

    if seg_instance is None:
        syn_info = fetch_instance_info(server, uuid, syn_instance)
        seg_instance = syn_info["Base"]["Syncs"][0]

    ref_seg = (server, uuid, seg_instance)

    # Load edges (if necessary), pre-filter, normalize
    edges_df = load_edges(edge_table)

    if update_edges:
        # Update the table for consistency with the given UUID,
        # and re-post-process it to find the correct "central" and "closest" edges,
        # (in case some groups were merged).
        edges_df = update_localized_edges(*ref_seg, edges_df,
                                          request_processes)

    # Technically, you could provide 0 for either of these,
    # but that's probably a mistake on your part.
    # (Unless you specifically appended some 0-synapse bodies to your
    # synapse table, and expect those to be considered BOIs.)
    assert min_tbars_in_roi >= 1 and min_psds_in_roi >= 1

    if boi_table is not None:
        boi_table = boi_table.query(
            'PreSyn >= @min_tbars_in_roi or PostSyn >= @min_psds_in_roi')
    else:
        assert not boi_rois, \
            "You can't specify boi_rois if you're providing your own boi_table"

        # Fetch synapse labels and determine the set of BOIs
        boi_table = determine_bodies_of_interest(server,
                                                 uuid,
                                                 syn_instance,
                                                 boi_rois,
                                                 min_tbars_in_roi,
                                                 min_psds_in_roi,
                                                 request_processes,
                                                 synapse_table=synapse_table)

    assert boi_table.index.name == 'body'
    assert set(boi_table.columns) == {'PreSyn', 'PostSyn'}

    bois = set(boi_table.index)

    # We're trying to connect BOIs to each other.
    # Therefore, we're not interested in groups of bodies
    # that don't contain at least 2 BOIs.
    edges_df = filter_groups_for_min_boi_count(edges_df, bois, ['group_cc'], 2)

    # Find the paths ('fragments', a.k.a. 'tasks') that connect BOIs within each group.
    fragment_edges_df = compute_fragment_edges(edges_df, bois, processes)

    if fragment_rois is not None:
        # Drop fragments that extend outside of the specified ROIs.
        fragment_edges_df = filter_fragments_for_roi(server, uuid,
                                                     fragment_rois,
                                                     fragment_edges_df)

    # If a group itself contained multiple CCs, it's possible that the BOIs were separated
    # into separate tasks, meaning that each individual task no longer satisfies the 2-BOI requirement.
    # Refilter.
    fragment_edges_df = filter_groups_for_min_boi_count(
        fragment_edges_df, bois, ['group_cc', 'cc_task'], 2)

    # Fetch the supervoxel IDs for each edge.
    with Timer("Sampling supervoxel IDs", logger):
        points_a = fragment_edges_df[['za', 'ya', 'xa']].values
        points_b = fragment_edges_df[['zb', 'yb', 'xb']].values
        fragment_edges_df['sv_a'] = fetch_labels_batched(
            *ref_seg, points_a, True, processes=request_processes)
        fragment_edges_df['sv_b'] = fetch_labels_batched(
            *ref_seg, points_b, True, processes=request_processes)

    # Divide into 'focused' and 'merge review' fragments,
    # i.e. single-edge fragments and multi-edge fragments
    focused_fragments_df = (
        fragment_edges_df.groupby(['group_cc', 'cc_task']).filter(
            lambda task_df: len(task_df) == 1)  # exactly one edge
        .copy())

    mr_fragments_df = (
        fragment_edges_df.groupby(['group_cc', 'cc_task']).filter(
            lambda task_df: len(task_df) > 1)  # multiple edges
        .copy())

    num_focused_fragments = len(focused_fragments_df)
    num_mr_fragments = len(
        mr_fragments_df.drop_duplicates(['group_cc', 'cc_task']))
    fragment_bodies = pd.unique(fragment_edges_df[['label_a', 'label_b'
                                                   ]].values.reshape(-1))
    num_fragment_bois = len(
        set(fragment_bodies).intersection(set(boi_table.index)))

    logger.info(f"Emitting {num_focused_fragments} focused fragments and "
                f"{num_mr_fragments} merge-review fragments, "
                f"covering {num_fragment_bois} BOIs out of {len(boi_table)}.")

    with Timer("Merging synapse counts onto results", logger):
        focused_fragments_df = focused_fragments_df.merge(boi_table,
                                                          'left',
                                                          left_on='label_a',
                                                          right_index=True)
        focused_fragments_df = focused_fragments_df.merge(boi_table,
                                                          'left',
                                                          left_on='label_b',
                                                          right_index=True,
                                                          suffixes=('_a',
                                                                    '_b'))

        mr_fragments_df = mr_fragments_df.merge(boi_table,
                                                'left',
                                                left_on='label_a',
                                                right_index=True)
        mr_fragments_df = mr_fragments_df.merge(boi_table,
                                                'left',
                                                left_on='label_b',
                                                right_index=True,
                                                suffixes=('_a', '_b'))

    with Timer("Constructing merge-review 'endpoint' dataframe", logger):
        try:
            mr_endpoint_df = construct_mr_endpoint_df(mr_fragments_df, bois)
        except BaseException as ex:
            logger.error(str(ex))
            logger.error(
                "Failed to construct the merge-review 'endpoint' dataframe.  Returning None."
            )
            mr_endpoint_df = None

    return focused_fragments_df, mr_fragments_df, mr_endpoint_df, boi_table
Example #8
0
#!/bin/env

# Use python 3.6 or greater
# python map_csv_to_segmentation.py emdata4:8900 6f2cb segmentation clean-synapses-6f2cb-sub3-roi-added.csv synIDs_synapses-6f2cb-rois-bodyIDs.csv

import os
import sys
import logging
import requests
import numpy as np
import pandas as pd
from neuclease.dvid import fetch_labels_batched

dvid_server = sys.argv[1]
dvid_uuid = sys.argv[2]
segmentation_inst = sys.argv[3]
infile = sys.argv[4]
outfile = sys.argv[5]

master_seg = (dvid_server, dvid_uuid, segmentation_inst)

df = pd.read_csv(infile)

labels = fetch_labels_batched(*master_seg,
                              df[['z', 'y', 'x']].values,
                              threads=32)

df['body'] = labels

df.to_csv(outfile, index=False)
Example #9
0
def fetch_roi_synapses(server,
                       uuid,
                       synapses_instance,
                       rois,
                       fetch_labels=False,
                       return_partners=False,
                       processes=16):
    """
    Fetch the coordinates and (optionally) body labels for 
    all synapses that fall within the given ROIs.
    
    Args:
    
        server:
            DVID server, e.g. 'emdata4:8900'
        
        uuid:
            DVID uuid, e.g. 'abc9'
        
        synapses_instance:
            DVID synapses instance name, e.g. 'synapses'
        
        rois:
            A single DVID ROI instance names or a list of them, e.g. 'EB' or ['EB', 'FB']
        
        fetch_labels:
            If True, also fetch the supervoxel and body label underneath each synapse,
            returned in columns 'sv' and 'body'.
            
        return_partners:
            If True, also return the partners table.

        processes:
            How many parallel processes to use when fetching synapses and supervoxel labels.
    
    Returns:
        pandas DataFrame with columns:
        ``['z', 'y', 'x', 'kind', 'conf']`` and ``['sv', 'body']`` (if ``fetch_labels=True``)
        If return_partners is True, also return the partners table.

    Example:
        df = fetch_roi_synapses('emdata4:8900', '3c281', 'synapses', ['PB(L5)', 'PB(L7)'], True, 8)
    """
    # Late imports to avoid circular imports in dvid/__init__
    from neuclease.dvid import fetch_combined_roi_volume, determine_point_rois, fetch_labels_batched, fetch_mapping, fetch_mappings

    assert rois, "No rois provided, result would be empty. Is that what you meant?"

    if isinstance(rois, str):
        rois = [rois]

    # Determine name of the segmentation instance that's
    # associated with the given synapses instance.
    syn_info = fetch_instance_info(server, uuid, synapses_instance)
    seg_instance = syn_info["Base"]["Syncs"][0]

    logger.info(f"Fetching mask for ROIs: {rois}")
    # Fetch the ROI as a low-res array (scale 5, i.e. 32-px resolution)
    roi_vol_s5, roi_box_s5, overlapping_pairs = fetch_combined_roi_volume(
        server, uuid, rois)

    if len(overlapping_pairs) > 0:
        logger.warning(
            "Some ROIs overlapped and are thus not completely represented in the output:\n"
            f"{overlapping_pairs}")

    # Convert to full-res box
    roi_box = (2**5) * roi_box_s5

    # fetch_synapses_in_batches() requires a box that is 64-px-aligned
    roi_box = round_box(roi_box, 64, 'out')

    logger.info("Fetching synapse points")
    # points_df is a DataFrame with columns for [z,y,x]
    points_df, partners_df = fetch_synapses_in_batches(server,
                                                       uuid,
                                                       synapses_instance,
                                                       roi_box,
                                                       processes=processes)

    # Append a 'roi_name' column to points_df
    logger.info("Labeling ROI for each point")
    determine_point_rois(server, uuid, rois, points_df, roi_vol_s5, roi_box_s5)

    logger.info("Discarding points that don't overlap with the roi")
    rois = {*rois}
    points_df = points_df.query('roi in @rois').copy()

    columns = ['z', 'y', 'x', 'kind', 'conf', 'roi_label', 'roi']

    if fetch_labels:
        logger.info("Fetching supervoxel under each point")
        svs = fetch_labels_batched(server,
                                   uuid,
                                   seg_instance,
                                   points_df[['z', 'y', 'x']].values,
                                   supervoxels=True,
                                   processes=processes)

        with Timer("Mapping supervoxels to bodies", logger):
            # Arbitrary heuristic for whether to do the
            # body-lookups on DVID or on the client.
            if len(svs) < 100_000:
                bodies = fetch_mapping(server, uuid, seg_instance, svs)
            else:
                mapping = fetch_mappings(server, uuid, seg_instance)
                mapper = LabelMapper(mapping.index.values, mapping.values)
                bodies = mapper.apply(svs, True)

        points_df['sv'] = svs
        points_df['body'] = bodies
        columns += ['body', 'sv']

    if return_partners:
        # Filter
        #partners_df = partners_df.query('post_id in @points_df.index and pre_id in @points_df.index').copy()

        # Faster filter (via merge)
        partners_df = partners_df.merge(points_df[[]],
                                        'inner',
                                        left_on='pre_id',
                                        right_index=True)
        partners_df = partners_df.merge(points_df[[]],
                                        'inner',
                                        left_on='post_id',
                                        right_index=True)
        return points_df[columns], partners_df
    else:
        return points_df[columns]
Example #10
0
def determine_bodies_of_interest(server,
                                 uuid,
                                 synapses_instance,
                                 rois=None,
                                 min_tbars=2,
                                 min_psds=10,
                                 processes=16,
                                 *,
                                 synapse_table=None,
                                 seg_instance=None):
    """
    Determine which bodies fit the given criteria
    for minimum synapse counts WITHIN the given ROIs.
    
    Note that the min_tbars and min_psds criteria are OR'd together.
    A body need only match at least one of the criteria to be considered "of interest".

    This function is just a convenience wrapper around calling
    fetch_roi_synapses(), fetch_labels_batched(), and body_synapse_counts().

    Note:
        If your synapse table is already loaded and already has a 'body' column,
        and you aren't providing any rois to filter with, then this function is
        merely equivalent to calling body_synapse_counts() and filtering it
        for tbar/psd requirements.

    Args:
        server:
            dvid server
        
        uuid:
            dvid uuid
        
        synapses_instance:
            synapses annotation instance name, e.g. 'synapses'
            If you are providing a pre-loaded synapse_table and overriding seg_instance,
            you can set synapses_instance=None.
        
        rois:
            A list of ROI instance names.  If provided, ONLY synapses
            within these ROIs will be counted when determining bodies of interest.
            If not provided, all synapses in the volume will be counted.

        min_tbars:
            All bodies with at least this many t-bars (PreSyn annotations) will be "of interest".
    
        min_psds:
            All bodies with at least this many PSDs (PostSyn annotations) will be "of interest".
        
        processes:
            How many parallel processes to use when fetching synapses and body labels.
        
        synapse_table:
            If you have a pre-loaded synapse table (or a path to one stored as .npy or .csv),
            you may provide it here, in which case the synapse points won't be fetched from DVID.
            Furthermore, if the table already contains a 'body' column, then it is presumed to be
            accurate and body labels will not be fetched from DVID.
    
        seg_instance:
            If you want to override the segmentation instance name to use
            (rather than inspecting the syanapse instance syncs), provide it here.
    
    Returns:
        pandas DataFrame, as returned by body_synapse_counts().
        That is, DataFrame with columns: ['PreSyn', 'PostSyn'], indexed by 'body',
        where only bodies of interest are included in the table.
    """
    from neuclease.dvid import fetch_labels_batched, fetch_combined_roi_volume, determine_point_rois

    # Download synapses if necessary
    if synapse_table is None:
        with Timer("Fetching synapse points", logger):
            if rois is None:
                # Fetch all synapses in the volume
                points_df, _partners_df = fetch_synapses_in_batches(
                    server, uuid, synapses_instance, processes=processes)
            else:
                # Fetch only the synapses within the given ROIs
                points_df = fetch_roi_synapses(server,
                                               uuid,
                                               synapses_instance,
                                               rois,
                                               False,
                                               processes=processes)
    else:
        # User provided a pre-loaded synapse table (or a path to one)
        if isinstance(synapse_table, str):
            with Timer(f"Loading synapse table {synapse_table}", logger):
                _, ext = os.path.splitext(synapse_table)
                assert ext in ('.csv', '.npy')
                if ext == '.csv':
                    synapse_table = load_synapses_csv(synapse_table)
                elif ext == '.npy':
                    synapse_table = load_synapses_npy(synapse_table)

        assert isinstance(synapse_table, pd.DataFrame)
        assert not ({'z', 'y', 'x', 'kind'} - {*synapse_table.columns}), \
            "Synapse table does not contain all expected columns"

        points_df = synapse_table
        if rois:
            roi_vol_s5, roi_box_s5, _ = fetch_combined_roi_volume(
                server, uuid, rois)
            determine_point_rois(server, uuid, rois, points_df, roi_vol_s5,
                                 roi_box_s5)
            points_df = points_df.query('roi_label != 0')

    if 'body' in points_df:
        logger.info("Using user-provided body labels")
    else:
        with Timer("Fetching synapse body labels", logger):
            if seg_instance is None:
                syn_info = fetch_instance_info(server, uuid, synapses_instance)
                seg_instance = syn_info["Base"]["Syncs"][0]

            points_df['body'] = fetch_labels_batched(server,
                                                     uuid,
                                                     seg_instance,
                                                     points_df[['z', 'y',
                                                                'x']].values,
                                                     processes=processes)

    with Timer("Aggregating body-wise synapse counts"):
        body_synapses_df = body_synapse_counts(points_df)

    body_synapses_df = body_synapses_df.query(
        'PreSyn >= @min_tbars or PostSyn >= @min_psds')
    return body_synapses_df