def _process_block(seg_src, mito_cc_src, mito_class_src, body_id, scale, block_coord):
    import numpy as np
    import pandas as pd

    from neuclease.util import ndindex_array
    from neuclease.dvid import fetch_labelmap_voxels

    block_box = np.array((block_coord, block_coord+64))
    block_seg = fetch_labelmap_voxels(*seg_src, block_box, scale)
    mito_labels = fetch_labelmap_voxels(*mito_cc_src, block_box, scale)
    mito_classes = fetch_labelmap_voxels(*mito_class_src, block_box, scale)

    body_mask = (block_seg == body_id)
    mito_mask = (mito_labels != 0) & (mito_classes != EMPTY_MITO)
    mask = (body_mask & mito_mask)
    if not mask.any():
        # No mito voxels of interest in this block
        return None

    unraveled_df = pd.DataFrame({'mito_id': mito_labels.reshape(-1),
                                 'mito_class': mito_classes.reshape(-1)})

    # pivot_table() doesn't work without a data column to aggregate
    unraveled_df['voxels'] = 1

    # Add coordinate columns to compute centroids
    raster_coords = ndindex_array(*(64, 64, 64), dtype=np.int32)
    raster_coords += block_coord
    unraveled_df['z'] = np.int8(0)
    unraveled_df['y'] = np.int8(0)
    unraveled_df['x'] = np.int8(0)
    unraveled_df[['z', 'y', 'x']] = raster_coords

    # Drop non-body voxels and non-mito-voxels
    unraveled_df = unraveled_df.iloc[mask.reshape(-1)]

    block_table = (unraveled_df[['mito_id', 'mito_class', 'voxels']]
                    .pivot_table(index='mito_id',  # noqa
                                 columns='mito_class',
                                 values='voxels',
                                 aggfunc='sum',
                                 fill_value=0))

    block_table.columns = [f"class_{c}" for c in block_table.columns]
    block_table['total_size'] = block_table.sum(axis=1).astype(np.int32)

    # Compute block centroid for each mito
    # FIXME: I think precision is lost here because I'm using the wrong dtype.
    mito_points = unraveled_df.groupby('mito_id')[['z', 'y', 'x']].mean().astype(np.float32)
    block_table = block_table.merge(mito_points, 'left', left_index=True, right_index=True)
    return block_table
def test_masksegmentation_resume(setup_dvid_segmentation_input,
                                 disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, _input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input

    brick_shape = config["input"]["geometry"]["message-block-shape"]
    batch_size = config["masksegmentation"]["batch-size"]

    # This is the total bricks in the volume, not necessarily
    # the total *processed* bricks, but it's close enough.
    total_bricks = np.ceil(np.prod(np.array(volume.shape) /
                                   brick_shape)).astype(int)
    total_batches = int(np.ceil(total_bricks / batch_size))

    # Skip over half of the original bricks.
    config["masksegmentation"]["resume-at"] = {
        "scale": 0,
        "batch-index": 1 + (total_batches // 2)
    }

    # re-dump config
    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    roi_mask = upsample(roi_mask_s5, 2**5)
    roi_mask = extract_subvol(roi_mask, input_box_zyx)

    masked_vol = extract_subvol(volume.copy(), input_box_zyx)
    masked_vol[roi_mask] = 0

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       output_box_zyx,
                                       scale=0,
                                       supervoxels=True)

    #np.save('/tmp/original.npy', volume)
    #np.save('/tmp/output.npy', output_vol)

    # First part was untouched
    assert (output_vol[:128] == volume[:128]).all()

    # Last part was touched somewhere
    assert (output_vol[128:] != volume[128:]).any()
def process_point(seg_src, seg_dst, point, radius, src_body, dst_body):
    """
    Generate a neighborhood segment around a particular point.
    Upload the voxels for the segment and the corresponding mesh.
    """
    r = radius
    src_box = np.asarray((point - r, point + r + 1))
    src_vol = fetch_labelmap_voxels(*seg_src, src_box)

    if src_body is None:
        src_body = src_vol[r, r, r]

    if dst_body is None:
        # Generate a neighborhood segment ID from the coordinate.
        # Divide by 4 to ensure the coordinates fit within 2^53.
        # (The segment ID will not retain the full resolution of
        # the coordinate, but that's usually OK for our purposes.)
        dst_body = encode_point_to_uint64(point // 4, 17)

    mask = (src_vol == src_body) & sphere_mask(r)

    dst_box = round_box(src_box, 64, 'out')
    dst_vol = fetch_labelmap_voxels(*seg_dst, dst_box)

    dst_view = dst_vol[b2s(*(src_box - dst_box[0]))]
    dst_view[mask] = dst_body

    post_labelmap_voxels(*seg_dst, dst_box[0], dst_vol, downres=True)

    # Mesh needs to be written in nm, hence 8x
    mesh = Mesh.from_binary_vol(mask, 8 * src_box, smoothing_rounds=2)
    mesh.simplify(0.05, in_memory=True)
    post_key(*seg_dst[:2], f'{seg_dst[2]}_meshes', f'{dst_body}.ngmesh',
             mesh.serialize(fmt='ngmesh'))

    centroid = src_box[0] + mask_centroid(mask, True)
    top_z = mask.sum(axis=(1, 2)).nonzero()[0][0]
    top_coords = np.transpose(mask[top_z].nonzero())
    top_point = src_box[0] + (top_z, *top_coords[len(top_coords) // 2])

    return point, centroid, top_point, src_body, dst_body, mask.sum()
Exemple #4
0
def execute(bbox: Bbox):
    print('bounding box: ', bbox)
    box = [tuple(bbox.minpt), tuple(bbox.maxpt)]
    subvol = dvid.fetch_labelmap_voxels(server,
                                        uuid,
                                        instance,
                                        box,
                                        scale=0,
                                        supervoxels=supervoxels)
    # print('cutout volume: \n', subvol)
    chunk = Chunk(subvol, voxel_offset=bbox.minpt)
    return [chunk]
Exemple #5
0
def test_fetch_labelmap_voxels(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation')

    # Test raw supervoxels
    voxels = fetch_labelmap_voxels(*instance_info,
                                   [(0, 0, 0), supervoxel_vol.shape],
                                   supervoxels=True)
    assert (voxels == supervoxel_vol).all()

    # Test mapped bodies
    voxels = fetch_labelmap_voxels(*instance_info,
                                   [(0, 0, 0), supervoxel_vol.shape],
                                   supervoxels=False)
    assert (voxels == 1).all()

    # Test uninflated mode
    voxels_proxy = fetch_labelmap_voxels(*instance_info,
                                         [(0, 0, 0), supervoxel_vol.shape],
                                         supervoxels=True,
                                         format='lazy-array')
    assert len(voxels_proxy.content) < supervoxel_vol.nbytes, \
        "Fetched data was apparently not compressed"
    assert (voxels_proxy() == supervoxel_vol).all()
Exemple #6
0
def test_post_labelmap_voxels(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo,
                                     'segmentation-scratch')

    # Write some random data and read it back.
    vol = np.random.randint(10, size=(128, 128, 128), dtype=np.uint64)
    offset = (64, 64, 64)

    post_labelmap_voxels(dvid_server, dvid_repo, 'segmentation-scratch',
                         offset, vol, 0)
    complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0),
                                                             (256, 256, 256)],
                                            supervoxels=True)

    assert (complete_voxels[64:192, 64:192, 64:192] == vol).all()
Exemple #7
0
def test_post_raw(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = (dvid_server, dvid_repo, 'segmentation-scratch')

    # Write some random data and read it back.
    data = np.random.randint(10, size=(64, 64, 64 * 3), dtype=np.uint64)
    offset_zyx = (0, 64, 0)

    post_raw(*instance_info, offset_zyx, data)
    complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0),
                                                             (128, 128, 192)],
                                            supervoxels=True)

    assert (complete_voxels[0:64, 64:128, 0:64] == data[:, :, 0:64]).all()
    assert (complete_voxels[0:64, 64:128, 64:128] == data[:, :, 64:128]).all()
    assert (complete_voxels[0:64, 64:128, 128:192] == data[:, :,
                                                           128:192]).all()
Exemple #8
0
def test_post_labelmap_blocks(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo,
                                     'segmentation-scratch')

    # Write some random data and read it back.
    blocks = np.random.randint(10, size=(3, 64, 64, 64), dtype=np.uint64)
    corners_zyx = [[0, 0, 0], [0, 64, 0], [0, 0, 64]]

    post_labelmap_blocks(dvid_server, dvid_repo, 'segmentation-scratch',
                         corners_zyx, blocks, 0)
    complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0),
                                                             (128, 128, 128)],
                                            supervoxels=True)

    assert (complete_voxels[0:64, 0:64, 0:64] == blocks[0]).all()
    assert (complete_voxels[0:64, 64:128, 0:64] == blocks[1]).all()
    assert (complete_voxels[0:64, 0:64, 64:128] == blocks[2]).all()
def test_labelmapcopy_partial(setup_dvid_segmentation_input,
                              disable_auto_retry):
    template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, _output_segmentation_name, partial_output_segmentation_name = setup_dvid_segmentation_input

    config = copy.deepcopy(config)
    config["output"]["dvid"][
        "segmentation-name"] = partial_output_segmentation_name

    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]

    max_scale = final_config['labelmapcopy']['max-scale']
    for scale in range(1 + max_scale):
        scaled_box = output_box_zyx // (2**scale)
        output_vol = fetch_labelmap_voxels(dvid_address,
                                           repo_uuid,
                                           partial_output_segmentation_name,
                                           scaled_box,
                                           scale=scale)
        assert (output_vol == expected_vols[scale]).all(), \
            f"Written vol does not match expected for scale {scale}"

    # Any labels NOT in the partial vol had to be written.
    written_labels = pd.unique(
        expected_vols[0][expected_vols[0] != partial_vol])
    assert len(written_labels) > 0, \
        "This test data was chosen poorly -- there's no difference between the partial and full labels!"

    svs = pd.read_csv(f'{execution_dir}/recorded-labels.csv')['sv']
    assert set(svs) == set(written_labels)
def test_labelmapcopy(setup_dvid_segmentation_input, disable_auto_retry):
    template_dir, _config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, _partial_output_segmentation_name = setup_dvid_segmentation_input

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]

    max_scale = final_config['labelmapcopy']['max-scale']
    for scale in range(1 + max_scale):
        scaled_box = output_box_zyx // (2**scale)
        output_vol = fetch_labelmap_voxels(dvid_address,
                                           repo_uuid,
                                           output_segmentation_name,
                                           scaled_box,
                                           scale=scale)
        assert (output_vol == expected_vols[scale]).all(), \
            f"Written vol does not match expected for scale {scale}"

    svs = pd.read_csv(f'{execution_dir}/recorded-labels.csv')['sv']
    assert set(svs) == set(np.unique(expected_vols[0].reshape(-1)))
Exemple #11
0
def download(bounding_box_zyx, output_path):
    shape = bounding_box_zyx[1] - bounding_box_zyx[0]

    with h5py.File(output_path, 'w') as f:
        gray_dset = f.create_dataset('grayscale',
                                     shape=shape,
                                     dtype=np.uint8,
                                     chunks=True)
        seg_dset = f.create_dataset('segmentation',
                                    shape=shape,
                                    dtype=np.uint64,
                                    chunks=True,
                                    compression='gzip')

        print("Downloading grayscale...")
        block_shape = (256, 256, 256)
        block_boxes = boxes_from_grid(bounding_box_zyx,
                                      block_shape,
                                      clipped=True)
        for block_box in tqdm(block_boxes):
            relative_box = block_box - bounding_box_zyx[0]
            block_gray = fetch_raw(*GRAYSCALE, block_box)
            overwrite_subvol(gray_dset, relative_box, block_gray)

        print("")
        print("Downloading segmentation...")
        block_boxes = boxes_from_grid(bounding_box_zyx,
                                      block_shape,
                                      clipped=True)
        for block_box in tqdm(block_boxes):
            relative_box = block_box - bounding_box_zyx[0]
            block_seg = fetch_labelmap_voxels(*SEGMENTATION, block_box)
            overwrite_subvol(seg_dset, relative_box, block_seg)

    print("")
    print("DONE")
def test_connectedcomponents_dvid_subset_labels(setup_connectedcomponents_dvid,
                                                disable_auto_retry):
    template_dir, _config, input_vol, dvid_address, repo_uuid, output_segmentation_name = setup_connectedcomponents_dvid

    execution_dir, workflow = launch_flow(template_dir, 1)
    _final_config = workflow.config

    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       [(0, 0, 0), input_vol.shape],
                                       supervoxels=True)
    assert output_vol.shape == input_vol.shape

    final_labels = pd.unique(output_vol.reshape(-1))

    # Never change label 0
    assert 0 in final_labels
    assert ((input_vol == 0) == (output_vol == 0)).all()

    # Single-component objects
    assert 2 in final_labels
    assert 4 in final_labels

    assert ((input_vol == 2) == (output_vol == 2)).all()
    assert ((input_vol == 4) == (output_vol == 4)).all()

    # Omitted from analysis; left unsplit
    assert 3 in final_labels
    assert ((input_vol == 3) == (output_vol == 3)).all()

    # Split objects
    assert 1 not in final_labels

    for corner in map(np.array, ndrange((0, 0, 0), (1, 8, 8), (1, 4, 4))):
        box = (corner, corner + 4)
        input_block = extract_subvol(input_vol, box)
        output_block = extract_subvol(output_vol, box)

        for orig_label in [1]:
            if orig_label in input_block:
                positions = (input_block == orig_label)

                assert (input_block[positions] != output_block[positions]).all(), \
                    f"original label {orig_label} was not split!"

                assert (output_block[positions] > input_vol.max()).all(), \
                    f"original label {orig_label} was not split!"

                # This block-based assertion is not generally true for all possible input,
                # but our test data blocks are set up so that this is a valid check.
                # (No block happens to contain more than one final CC that came from the same original label.)
                assert (output_block[positions] == output_block[positions][0]).all(), \
                    f"original label {orig_label} ended up over-segmentated"

    #
    # Check CSV output
    #
    df = pd.read_csv(f'{execution_dir}/relabeled-objects.csv')

    assert len(df.query('orig_label == 0')) == 0
    assert len(df.query('orig_label == 1')) == 3
    assert len(df.query('orig_label == 2')) == 0
    assert len(df.query('orig_label == 3')) == 0  # 3 was not touched.
    assert len(df.query('orig_label == 4')) == 0

    assert not df['final_label'].duplicated().any()
    assert (df['final_label'] > input_vol.max()).all()

    #
    # Check block stats
    #
    with h5py.File(f'{execution_dir}/block-statistics.h5', 'r') as f:
        stats_df = pd.DataFrame(f['stats'][:])

    for row in stats_df.itertuples():
        corner = np.array((row.z, row.y, row.x))
        block_box = np.array([corner, corner + 64])
        block = extract_subvol(output_vol, block_box)
        assert (block == row.segment_id).sum() == row.count
def test_copysegmentation_from_hdf5_to_dvid_multiscale(
        setup_hdf5_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, _ = setup_hdf5_segmentation_input

    # Modify the config from above to compute pyramid scales,
    # and choose a bounding box that is aligned with the bricks even at scale 2
    # (just for easier testing).
    box_zyx = [[0, 0, 0], [256, 256, 256]]
    config["input"]["geometry"]["bounding-box"] = box_zyx
    config["copysegmentation"]["pyramid-depth"] = 2

    # Change the segmentation name so it doesn't conflict with earlier tests
    output_segmentation_name = 'segmentation-output-from-hdf5-multiscale'
    config["output"]["dvid"]["segmentation-name"] = output_segmentation_name

    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    _execution_dir, _workflow = launch_flow(template_dir, 1)

    box_zyx = np.array(box_zyx)

    scale_0_vol = volume[box_to_slicing(*box_zyx)]
    scale_1_vol = downsample_labels(scale_0_vol, 2, True)
    scale_2_vol = downsample_labels(scale_1_vol, 2, True)

    output_0_vol = fetch_labelmap_voxels(dvid_address,
                                         repo_uuid,
                                         output_segmentation_name,
                                         box_zyx // 1,
                                         scale=0)
    output_1_vol = fetch_labelmap_voxels(dvid_address,
                                         repo_uuid,
                                         output_segmentation_name,
                                         box_zyx // 2,
                                         scale=1)
    output_2_vol = fetch_labelmap_voxels(dvid_address,
                                         repo_uuid,
                                         output_segmentation_name,
                                         box_zyx // 4,
                                         scale=2)

    #     np.save('/tmp/expected-0.npy', scale_0_vol)
    #     np.save('/tmp/expected-1.npy', scale_1_vol)
    #     np.save('/tmp/expected-2.npy', scale_2_vol)
    #
    #     np.save('/tmp/output-0.npy', output_0_vol)
    #     np.save('/tmp/output-1.npy', output_1_vol)
    #     np.save('/tmp/output-2.npy', output_2_vol)
    #
    #     np.save('/tmp/diff-0.npy', (output_0_vol != scale_0_vol))
    #     np.save('/tmp/diff-1.npy', (output_1_vol != scale_1_vol))
    #     np.save('/tmp/diff-2.npy', (output_2_vol != scale_2_vol))

    assert (output_0_vol == scale_0_vol).all(), \
        "Scale 0: Written vol does not match expected"
    assert (output_1_vol == scale_1_vol).all(), \
        "Scale 1: Written vol does not match expected"
    assert (output_2_vol == scale_2_vol).all(), \
        "Scale 2: Written vol does not match expected"
def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz,
                          neighborhood_id, mito_res_scale_diff):
    """
    Determine how many non-trivial mito objects overlap with the given "neighborhood object",
    and return a table of their IDs and sizes.

    1. Download the neighborhood mask for the given neighborhood_id.
    2. Erode the neighborhood mask by 1 px (see note in the comment above).
    3. Fetch the mito segmentation for the voxels within the neighborhood.
    4. Fetch (from dvid) the sizes of each mito object.
    5. Filter out the mitos that are smaller than the minimum size that is
       actually used in our published mito analyses.
    6. Just for additional info, determine how many connected components
       are formed by the mito objects.
    7. Return the mito IDs, sizses, and CC info as a DataFrame.
    """
    # The neighborhood segmentation source
    protocol, url = mito_roi_source.split('://')[-2:]
    server, uuid, instance = url.split('/')
    server = f'{protocol}://{server}'

    origin_zyx = np.array(neighborhood_origin_xyz[::-1])
    box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS]

    # Align box to the analysis scale before scaling it.
    box = round_box(box, (2**ANALYSIS_SCALE))

    # Scale box
    box //= (2**ANALYSIS_SCALE)

    neighborhood_seg = fetch_labelmap_voxels(server,
                                             uuid,
                                             instance,
                                             box,
                                             scale=ANALYSIS_SCALE)
    neighborhood_mask = (neighborhood_seg == neighborhood_id)

    # This is equivalent to a 1-px erosion
    # See note above for why we do this.
    neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner')

    mito_seg = fetch_labelmap_voxels(*MITO_SEG,
                                     box,
                                     supervoxels=True,
                                     scale=ANALYSIS_SCALE -
                                     mito_res_scale_diff)
    assert neighborhood_mask.shape == mito_seg.shape
    mito_seg = np.where(neighborhood_mask, mito_seg, 0)

    # The mito segmentation includes little scraps and slivers
    # that were filtered out of the "real" mito set.
    # Filter those scraps out of our results here.
    mito_ids = set(pd.unique(mito_seg.ravel())) - {0}
    mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True)
    mito_sizes = mito_sizes.rename_axis('mito')
    mito_sizes *= (2**mito_res_scale_diff)**3

    # This is our main result: mito IDs (and their sizes)
    mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE]

    # Just for extra info, group the mitos we found into connected components.
    mito_mask = mask_for_labels(mito_seg, mito_sizes.index)
    mito_box = compute_nonzero_box(mito_mask)
    mito_mask = extract_subvol(mito_mask, mito_box)
    mito_seg = extract_subvol(mito_seg, mito_box)
    mito_cc = label(mito_mask, connectivity=1)
    ct = contingency_table(mito_seg, mito_cc).reset_index()
    ct = ct.rename(columns={
        'left': 'mito',
        'right': 'cc',
        'voxel_count': 'cc_size'
    })
    ct = ct.set_index('mito')
    mito_sizes = pd.DataFrame(mito_sizes).merge(ct,
                                                'left',
                                                left_index=True,
                                                right_index=True)
    return mito_sizes
def test_copysegmentation_from_brainmaps_to_dvid(setup_dvid_repo):
    """
    Fetch a tiny subvolume from a Brainmaps source.
    To run this test, you must have valid application credentials loaded in your bash environment,
    
    For example:
        
        export GOOGLE_APPLICATION_CREDENTIALS=/Users/bergs/dvid-em-28a78d822e11.json
        PYTHONPATH=. pytest -s --tb=native --pyargs tests.workflows.test_copysegmentation -k copysegmentation_from_brainmaps_to_dvid
    """
    dvid_address, repo_uuid = setup_dvid_repo
    output_segmentation_name = 'segmentation-output-from-brainmaps'

    box_start = np.array([8000, 23296, 12800])
    box_xyz = np.array([box_start, box_start + 256])
    box_zyx = box_xyz[:, ::-1]

    config_text = textwrap.dedent(f"""\
        workflow-name: copysegmentation
        cluster-type: {CLUSTER_TYPE}
         
        input:
          brainmaps:
            project: '274750196357'
            dataset: hemibrain
            volume-id: base20180227_8nm_watershed_fixed
            change-stack-id: ''

            # Uh-oh, apparently this change stack is no longer available in BrainMaps??
            #change-stack-id: ffn_agglo_20180312_32_16_8_freeze10

          geometry:
            bounding-box: {box_xyz.tolist()}
            message-block-shape: [6400, 64, 64]
            block-width: 64
            available-scales: [0,1,2]

        output:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {output_segmentation_name}
            supervoxels: true
            disable-indexing: true
            create-if-necessary: true
           
          geometry: {{}} # Auto-set from input
 
        copysegmentation:
          pyramid-depth: 2
          slab-depth: 128
          download-pre-downsampled: true
    """)

    template_dir = tempfile.mkdtemp(suffix="copysegmentation-from-brainmaps")
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    yaml = YAML()
    with StringIO(config_text) as f:
        config = yaml.load(f)

    _execution_dir, _workflow = launch_flow(template_dir, 1)

    # Fetch the data via a simpler method, and verify that it matches what we stored in DVID.
    from flyemflows.volumes.brainmaps_volume import BrainMapsVolume
    bmv = BrainMapsVolume.from_flyem_source_info(config['input']['brainmaps'])

    for scale in (0, 1, 2):
        expected_vol = bmv.get_subvolume(box_zyx // 2**scale, scale=scale)

        assert expected_vol.any(), \
            f"Something is wrong with this test: The brainmaps volume at scale {scale} is all zeros!"

        output_vol = fetch_labelmap_voxels(dvid_address,
                                           repo_uuid,
                                           output_segmentation_name,
                                           box_zyx // 2**scale,
                                           scale=scale)
        assert (output_vol == expected_vol).all()
def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask,
                                roi_dilation, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input

    if invert_mask:
        roi_mask_s5 = ~roi_mask_s5

    config["masksegmentation"]["invert-mask"] = invert_mask
    config["masksegmentation"]["dilate-roi"] = roi_dilation

    # re-dump config
    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    roi_mask = upsample(roi_mask_s5, 2**5)
    roi_mask = extract_subvol(roi_mask, input_box_zyx)

    expected_vol = extract_subvol(volume.copy(), input_box_zyx)
    expected_vol[roi_mask] = 0

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       output_box_zyx,
                                       scale=0,
                                       supervoxels=True)

    # Create a copy of the volume that contains only the voxels we removed
    erased_vol = volume.copy()
    erased_vol[~roi_mask] = 0

    if EXPORT_DEBUG_FILES:
        original_vol = fetch_labelmap_voxels(dvid_address,
                                             repo_uuid,
                                             input_segmentation_name,
                                             output_box_zyx,
                                             scale=0,
                                             supervoxels=True)
        original_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                   repo_uuid,
                                                   input_segmentation_name,
                                                   output_box_zyx,
                                                   scale=0)
        output_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                 repo_uuid,
                                                 output_segmentation_name,
                                                 output_box_zyx,
                                                 scale=0)
        np.save('/tmp/original-svs.npy', original_vol)
        np.save('/tmp/original-agglo.npy', original_agglo_vol)
        np.save('/tmp/output.npy', output_vol)
        np.save('/tmp/output-agglo.npy', output_agglo_vol)
        np.save('/tmp/expected.npy', expected_vol)
        np.save('/tmp/erased.npy', erased_vol)

        shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5')
        if roi_dilation:
            shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5',
                            '/tmp/dilated-roi-mask.h5')
        if invert_mask:
            shutil.copyfile(f'{execution_dir}/segmentation-mask.h5',
                            '/tmp/segmentation-mask.h5')
        shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5')

    if roi_dilation > 0:
        # FIXME: We don't yet verify voxel-accuracy of ROI dilation.
        return

    assert (output_vol == expected_vol).all(), \
        "Written vol does not match expected"

    scaled_expected_vol = expected_vol
    for scale in range(1, 1 + MAX_SCALE):
        scaled_expected_vol = downsample(scaled_expected_vol, 2,
                                         'labels-numba')
        scaled_output_vol = fetch_labelmap_voxels(dvid_address,
                                                  repo_uuid,
                                                  output_segmentation_name,
                                                  output_box_zyx // 2**scale,
                                                  scale=scale,
                                                  supervoxels=True)

        if EXPORT_DEBUG_FILES:
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/output-{scale}.npy', scaled_output_vol)

        if scale <= 5:
            assert (scaled_output_vol == scaled_expected_vol).all(), \
                f"Written vol does not match expected at scale {scale}"
        else:
            # For scale 6 and 7, some blocks are not even changed,
            # but that means we would be comparing DVID's label
            # downsampling method to our method ('labels-numba').
            # The two don't necessarily give identical results in the case of 'ties',
            # so we'll just verify that the nonzero voxels match, at least.
            assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \
                f"Written vol does not match expected at scale {scale}"

    block_stats_path = f'{execution_dir}/erased-block-statistics.h5'
    with h5py.File(block_stats_path, 'r') as f:
        stats_df = pd.DataFrame(f['stats'][:])

    #
    # Check the exported block statistics
    #
    stats_cols = [*BLOCK_STATS_DTYPES.keys()]
    assert stats_df.columns.tolist() == stats_cols
    stats_df = stats_df.sort_values(stats_cols).reset_index()

    expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol,
                                               input_box_zyx)
    expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index()

    assert len(stats_df) == len(expected_stats_df)
    assert (stats_df == expected_stats_df).all().all()

    #
    # Try updating the labelindexes
    #
    src_info = (dvid_address, repo_uuid, input_segmentation_name)
    dest_info = (dvid_address, repo_uuid, output_segmentation_name)
    with switch_cwd(execution_dir):
        erase_from_labelindexes(src_info,
                                dest_info,
                                block_stats_path,
                                batch_size=10,
                                threads=4)

    # Verify deleted supervoxels
    assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv')
    deleted_svs = set(
        pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv'])

    orig_svs = {*pd.unique(volume.reshape(-1))} - {0}
    remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0}
    expected_deleted_svs = orig_svs - remaining_svs
    assert deleted_svs == expected_deleted_svs

    # Verify remaining sizes
    expected_sv_counts = (pd.Series(
        expected_vol.reshape(-1),
        name='sv').value_counts().drop(0).sort_index().rename('count'))

    index_dfs = []
    for body in np.unique(fetch_mapping(*dest_info, remaining_svs)):
        index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks
        index_dfs.append(index_df)

    sv_counts = (pd.concat(index_dfs, ignore_index=True)[[
        'sv', 'count'
    ]].groupby('sv')['count'].sum().sort_index())
    assert set(sv_counts.index.values) == set(expected_sv_counts.index.values)
    assert (sv_counts == expected_sv_counts).all(), \
        pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count')

    # Verify mapping
    # Deleted supervoxels exist in the mapping, but they map to 0.
    assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all()

    # Remaining supervoxels still map to their original bodies
    assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping(
        *src_info, [*remaining_svs])).all()
def setup_dvid_segmentation_input(setup_dvid_repo, random_segmentation):
    dvid_address, repo_uuid = setup_dvid_repo

    input_segmentation_name = 'labelmapcopy-segmentation-input'
    output_segmentation_name = 'labelmapcopy-segmentation-output'

    partial_output_segmentation_name = 'labelmapcopy-segmentation-partial-output'

    max_scale = 3
    already_exists = False

    try:
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name,
                                 max_scale=max_scale)
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name,
                                 max_scale=max_scale)
    except HTTPError as ex:
        if ex.response is not None and 'already exists' in ex.response.content.decode(
                'utf-8'):
            already_exists = True

    expected_vols = {}
    for scale in range(1 + max_scale):
        if scale == 0:
            scaled_vol = random_segmentation
        else:
            scaled_vol = downsample(scaled_vol, 2, 'labels-numba')
        expected_vols[scale] = scaled_vol

        if not already_exists:
            scaled_box = round_box([(0, 0, 0), scaled_vol.shape], 64, 'out')
            aligned_vol = np.zeros(scaled_box[1], np.uint64)
            overwrite_subvol(aligned_vol, [(0, 0, 0), scaled_vol.shape],
                             scaled_vol)
            post_labelmap_voxels(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name, (0, 0, 0),
                                 aligned_vol,
                                 scale=scale)

    if not already_exists:
        # Create a 'partial' output volume that is the same (bitwise) as the input except for some blocks.
        scaled_box = np.array([(0, 0, 0), random_segmentation.shape])
        scaled_box[1, -1] = 192
        for scale in range(1 + max_scale):
            scaled_box = round_box(scaled_box // (2**scale), 64, 'out')
            raw_blocks = fetch_labelmap_voxels(dvid_address,
                                               repo_uuid,
                                               input_segmentation_name,
                                               scaled_box,
                                               scale,
                                               supervoxels=True,
                                               format='raw-response')
            post_labelmap_blocks(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name, [(0, 0, 0)],
                                 raw_blocks,
                                 scale,
                                 is_raw=True)

        block = np.random.randint(1_000_000,
                                  1_000_010,
                                  size=(64, 64, 64),
                                  dtype=np.uint64)
        post_labelmap_voxels(dvid_address,
                             repo_uuid,
                             partial_output_segmentation_name, (0, 128, 64),
                             block,
                             0,
                             downres=True)

    partial_vol = fetch_labelmap_voxels(dvid_address,
                                        repo_uuid,
                                        partial_output_segmentation_name,
                                        [(0, 0, 0), random_segmentation.shape],
                                        supervoxels=True)

    template_dir = tempfile.mkdtemp(suffix="labelmapcopy-template")

    config_text = textwrap.dedent(f"""\
        workflow-name: labelmapcopy
        cluster-type: {CLUSTER_TYPE}
         
        input:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {input_segmentation_name}
            supervoxels: true
           
          geometry:
            message-block-shape: [512,64,64]
            available-scales: [0,1,2,3]
 
        output:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {output_segmentation_name}
            supervoxels: true
            disable-indexing: true
            create-if-necessary: true
        
        labelmapcopy:
          slab-shape: [512,128,64]
          dont-overwrite-identical-blocks: true
    """)

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    yaml = YAML()
    with StringIO(config_text) as f:
        config = yaml.load(f)

    return template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, partial_output_segmentation_name
def neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, body_id, scale=0, min_size=0, search_radius=50, processes=1):
    from functools import partial
    import numpy as np
    import pandas as pd

    from neuclease.util import compute_parallel
    from neuclease.dvid import fetch_sparsevol_coarse, resolve_ref, fetch_labels, fetch_labelmap_voxels

    seg_src[1] = resolve_ref(*seg_src[:2])
    mito_cc_src[1] = resolve_ref(*mito_cc_src[:2])
    mito_class_src[1] = resolve_ref(*mito_class_src[:2])

    # Fetch block coords; re-scale for the analysis scale
    block_coords = (2**6) * fetch_sparsevol_coarse(*seg_src, body_id)
    bc_df = pd.DataFrame(block_coords, columns=[*'zyx'])
    bc_df[[*'zyx']] //= 2**scale
    block_coords = bc_df.drop_duplicates().values

    #
    # Blockwise stats
    #
    block_fn = partial(_process_block, seg_src, mito_cc_src, mito_class_src, body_id, scale)
    block_tables = compute_parallel(block_fn, block_coords, processes=processes)
    block_tables = [*filter(lambda t: t is not None, block_tables)]
    #
    # Combine stats
    #
    full_table = pd.concat(block_tables, sort=True).fillna(0)
    class_cols = [*filter(lambda c: c.startswith('class'), full_table.columns)]
    full_table = full_table.astype({c: np.int32 for c in class_cols})

    # Weight each block centroid by the block's voxel count before taking the mean
    full_table[[*'zyx']] *= full_table[['total_size']].values
    stats_df = full_table.groupby('mito_id').sum()
    stats_df[[*'zyx']] /= stats_df[['total_size']].values

    # Drop tiny mitos
    stats_df = stats_df.query("total_size >= @min_size").copy()

    # Assume all centroids are 'exact' by default (overwritten below if necessary)
    stats_df['centroid_type'] = 'exact'

    # Include a column for 'body' even thought its the same on every row,
    # just as a convenience for concatenating these results with the results
    # from other bodies if desired.
    stats_df['body'] = body_id

    stats_df = stats_df.astype({a: np.int32 for a in 'zyx'})
    stats_df = stats_df[['body', *'xyz', 'total_size', *class_cols, 'centroid_type']]

    #
    # Check for centroids that fall outside of the mito,
    # and adjust them if necessary.
    #
    centroid_mitos = fetch_labels(*mito_cc_src, stats_df[[*'zyx']].values, scale=scale)
    mismatches = stats_df.index[(stats_df.index != centroid_mitos)]

    if len(mismatches) == 0:
        return stats_df

    logger.warning("Some mitochondria centroids do not lie within the mitochondria itself. "
                   "Searching for pseudo-centroids.")

    # construct field of distances from the central voxel
    sr = search_radius
    cz, cy, cx = np.ogrid[-sr:sr+1, -sr:sr+1, -sr:sr+1]
    distances = np.sqrt(cz**2 + cy**2 + cx**2)

    pseudo_centroids = []
    error_mito_ids = []
    for row in stats_df.loc[mismatches].itertuples():
        mito_id = row.Index
        centroid = np.array((row.z, row.y, row.x))
        box = (centroid - sr, 1 + centroid + sr)
        mito_mask = (mito_id == fetch_labelmap_voxels(*mito_cc_src, box, scale))

        if not mito_mask.any():
            pseudo_centroids.append((row.z, row.y, row.x))
            error_mito_ids.append(mito_id)
            continue

        # Find minimum distance
        masked_distances = np.where(mito_mask, distances, np.inf)
        new_centroid = np.unravel_index(np.argmin(masked_distances), masked_distances.shape)
        new_centroid = np.array(new_centroid) + centroid - sr
        pseudo_centroids.append(new_centroid)

    stats_df.loc[mismatches, ['z', 'y', 'x']] = np.array(pseudo_centroids, dtype=np.int32)
    stats_df.loc[mismatches, 'centroid_type'] = 'adjusted'
    stats_df.loc[error_mito_ids, 'centroid_type'] = 'error'

    if error_mito_ids:
        logger.warning("Some mitochondria pseudo-centroids could not be found.")

    stats_df = stats_df.astype({a: np.int32 for a in 'zyx'})
    return stats_df
    def get_subvolume(self, box_zyx, scale=0):
        req_bytes = self._dtype_nbytes * np.prod(box_zyx[1] - box_zyx[0])

        instance_name = self._instance_name
        if self._instance_type.endswith('blk') and scale > 0:
            # Grayscale multi-scale is achieved via multiple instances
            instance_name = f"{instance_name}_{scale}"
            scale = 0

        try:
            if self._instance_type in ('labelarray', 'labelmap'):
                # Obtain permission from the resource manager while fetching the compressed data,
                # but release the resource token before inflating the data.
                with self._resource_manager_client.access_context(
                        self._server, True, 1, req_bytes):
                    aligned_box = round_box(box_zyx, 64, 'out')
                    if 8 * np.prod(aligned_box[1] - aligned_box[0]) < 2**31:
                        vol_proxy = fetch_labelmap_voxels(
                            self._server,
                            self._uuid,
                            instance_name,
                            box_zyx,
                            scale,
                            self._throttle,
                            supervoxels=self.supervoxels,
                            format='lazy-array')
                    else:
                        # Requested subvolume is too large to download in one request.
                        # Download it in chunks, with a somewhat arbitrary chunkshape
                        chunk_shape = (64, 128, 10240)
                        vol_proxy = fetch_labelmap_voxels_chunkwise(
                            self._server,
                            self._uuid,
                            instance_name,
                            box_zyx,
                            scale,
                            self._throttle,
                            supervoxels=self.supervoxels,
                            format='lazy-array',
                            chunk_shape=chunk_shape)
                # Inflate after releasing resource token
                return vol_proxy()
            else:
                with self._resource_manager_client.access_context(
                        self._server, True, 1, req_bytes):
                    return fetch_raw(self._server, self._uuid, instance_name,
                                     box_zyx, self._throttle)

        except Exception as ex:
            # In cluster scenarios, a chained 'raise ... from ex' traceback
            # doesn't get fully transmitted to the driver,
            # so we simply append this extra info to the current exception
            # rather than using exception chaining.
            # Also log it now so it at least appears in the worker log.
            # See: https://github.com/dask/dask/issues/4384
            import traceback, io
            sio = io.StringIO()
            traceback.print_exc(file=sio)
            logger.log(logging.ERROR, sio.getvalue())

            host = socket.gethostname()
            msg = f"Host {host}: Failed to fetch subvolume: box_zyx = {box_zyx.tolist()}"

            ex.args += (msg, )
            raise
Exemple #20
0
def _fetch_svs(master_seg, box):
    vol = fetch_labelmap_voxels(*master_seg, box, supervoxels=True)
    return set(pd.unique(vol.reshape(-1)))
def autogen_points(input_seg,
                   count,
                   roi,
                   body,
                   tbars,
                   use_skeleton,
                   random_seed=None,
                   minimum_distance=0):
    """
    Generate a list of points within the input segmentation, based on the given criteria.
    See the main help text below for details.
    """
    if tbars and not body:
        sys.exit(
            "If you want to auto-generate tbar points, please specify a body.")

    if not tbars and not count:
        sys.exit(
            "You must supply a --count unless you are generating all tbars of a body."
        )

    if use_skeleton:
        if not body:
            sys.exit(
                "You must supply a body ID if you want to use a skeleton.")
        if tbars:
            sys.exit(
                "You can't select both tbar points and skeleton points.  Pick one or the other."
            )
        if not count and minimum_distance > 0:
            sys.exit(
                "You must supply a --count if you want skeleton point samples to respect the minimum distance."
            )
        if not count and not roi and minimum_distance == 0:
            logger.warning(
                "You are using all nodes of a skeleton without any ROI filter! Is that what you meant?"
            )

    rng = default_rng(random_seed)

    if tbars:
        logger.info(f"Fetching synapses for body {body}")
        syn_df = fetch_annotation_label(*input_seg[:2],
                                        'synapses',
                                        body,
                                        format='pandas')
        tbars = syn_df.query('kind == "PreSyn"')[[*'zyx']]

        if roi:
            logger.info(f"Filtering tbars for roi {roi}")
            determine_point_rois(*input_seg[:2], [roi], tbars)
            tbars = tbars.query('roi == @roi')[[*'zyx']]

        if minimum_distance:
            logger.info(
                f"Pruning close points from {len(tbars)} total tbar points")
            tbars = prune_close_pairs(tbars, minimum_distance, rng)
            logger.info(f"After pruning, {len(tbars)} tbars remain.")

        if count:
            count = min(count, len(tbars))
            logger.info(f"Sampling {count} tbars")
            choices = rng.choice(tbars.index, size=count, replace=False)
            tbars = tbars.loc[choices]

        logger.info(f"Returning {len(tbars)} tbar points")
        return tbars

    elif use_skeleton:
        assert body
        logger.info(f"Fetching skeleton for body {body}")
        skeleton_instance = f'{input_seg[2]}_skeletons'
        swc = fetch_key(*input_seg[:2], skeleton_instance, f'{body}_swc')
        skeleton_df = swc_to_dataframe(swc)
        skeleton_df['x'] = skeleton_df['x'].astype(int)
        skeleton_df['y'] = skeleton_df['y'].astype(int)
        skeleton_df['z'] = skeleton_df['z'].astype(int)

        if roi:
            logger.info(f"Filtering skeleton for roi {roi}")
            determine_point_rois(*input_seg[:2], [roi], skeleton_df)
            skeleton_df = skeleton_df.query('roi == @roi')[[*'zyx']]

        if minimum_distance:
            assert count
            # Distance-pruning is very expensive on a huge number of close points.
            # If skeleton is large, first reduce the workload by pre-selecting a
            # random sample of skeleton points, and prune more from there.
            if len(skeleton_df) > 10_000:
                # FIXME: random_state can't use rng until I upgrade to pandas 1.0
                skeleton_df = skeleton_df.sample(min(4 * count,
                                                     len(skeleton_df)),
                                                 random_state=None)
            logger.info(
                f"Pruning close points from {len(skeleton_df)} skeleton points"
            )
            prune_close_pairs(skeleton_df, minimum_distance, rng)
            logger.info(
                f"After pruning, {len(skeleton_df)} skeleton points remain.")

        if count:
            count = min(count, len(skeleton_df))
            logger.info(f"Sampling {count} skeleton points")
            choices = rng.choice(skeleton_df.index, size=count, replace=False)
            skeleton_df = skeleton_df.loc[choices]

        logger.info(f"Returning {len(skeleton_df)} skeleton points")
        return skeleton_df

    elif body:
        assert count
        if roi:
            # TODO: intersect the ranges with the ROI.
            raise NotImplementedError(
                "Sorry, I haven't yet implemented support for "
                "body+roi filtering.  Pick one or the other, "
                "or ask Stuart to fix this.")

        logger.info(f"Fetching sparsevol for body {body}")
        ranges = fetch_sparsevol(*input_seg, body, format='ranges')
        logger.info("Sampling from sparsevol")

        if minimum_distance > 0:
            # Sample 4x extra so we still have enough after pruning.
            points = sample_points_from_ranges(ranges, 4 * count, rng)
        else:
            points = sample_points_from_ranges(ranges, count, rng)

        points = pd.DataFrame(points, columns=[*'zyx'])

        if minimum_distance > 0:
            logger.info(f"Pruning close points from {len(points)} body points")
            prune_close_pairs(points, minimum_distance, rng)
            logger.info(f"After pruning, {len(points)} body points remain")

        points = points.iloc[:count]
        logger.info(f"Returning {len(points)} body points")
        return points

    elif roi:
        assert count
        logger.info(f"Fetching roi {roi}")
        roi_ranges = fetch_roi_roi(*input_seg[:2], roi, format='ranges')
        logger.info("Sampling from ranges")

        if minimum_distance > 0:
            # Sample 4x extra so we can prune some out if necessary.
            points_s5 = sample_points_from_ranges(roi_ranges, 4 * count, rng)
        else:
            points_s5 = sample_points_from_ranges(roi_ranges, count, rng)

        corners_s0 = points_s5 * (2**5)
        points_s0 = rng.integers(corners_s0, corners_s0 + (2**5))
        points = pd.DataFrame(points_s0, columns=[*'zyx'])

        if minimum_distance > 0:
            logger.info(f"Pruning close points from {len(points)} roi points")
            prune_close_pairs(points, minimum_distance, rng)
            logger.info(
                f"After pruning, points from {len(points)} roi points remain")

        points = points.iloc[:count]
        logger.info(f"Returning {len(points)} roi points")
        return points
    else:
        # No body or roi specified, just sample from the whole non-zero segmentation area
        assert count
        logger.info("Sampling random points from entire input segmentation")
        logger.info("Fetching low-res input volume")
        box_s6 = round_box(fetch_volume_box(*input_seg), 2**6, 'out') // 2**6
        seg_s6 = fetch_labelmap_voxels(*input_seg, box_s6, scale=6)
        mask_s6 = seg_s6.astype(bool)
        logger.info("Encoding segmentation as ranges")
        seg_ranges = runlength_encode_mask_to_ranges(mask_s6, box_s6)

        logger.info("Sampling from ranges")

        if minimum_distance > 0:
            # Sample 4x extra so we can prune some out if necessary.
            points_s6 = sample_points_from_ranges(seg_ranges, 4 * count, rng)
        else:
            points_s6 = sample_points_from_ranges(seg_ranges, count, rng)

        corners_s0 = points_s6 * (2**6)
        points_s0 = rng.integers(corners_s0, corners_s0 + (2**6))

        points = pd.DataFrame(points_s0, columns=[*'zyx'])

        if minimum_distance > 0:
            logger.info(
                f"Pruning close points from {len(points)} segmentation points")
            prune_close_pairs(points, minimum_distance, rng)
            logger.info(
                f"After pruning, points from {len(points)} segmentation points remain"
            )

        points = points.iloc[:count]
        logger.info(f"Returning {len(points)} segmentation points")
        return points
Exemple #22
0
        def copy_box(box, scale):
            assert not record_only or scale == 0
            box = round_box(box, 64, 'out')
            box_shape = (box[1] - box[0])

            # Read input blocks
            with mgr_client.access_context(input_service.server, True, 1,
                                           np.prod(box_shape)):
                input_raw_blocks = fetch_labelmap_voxels(
                    *input_service.instance_triple,
                    box,
                    scale,
                    False,
                    input_service.supervoxels,
                    format='raw-response')

            # If we're just recording, parse and return
            if scale == 0 and record_only:
                _input_spans, input_labels = parse_labelarray_data(
                    input_raw_blocks, extract_labels=True)
                return list(set(chain(*input_labels.values())))

            # If not checking the output, just copy input to output
            if not check_existing:
                with mgr_client.access_context(output_service.server, False, 1,
                                               np.prod(box_shape)):
                    post_labelmap_blocks(*output_service.instance_triple,
                                         None,
                                         input_raw_blocks,
                                         scale,
                                         output_service.enable_downres,
                                         output_service.disable_indexing,
                                         False,
                                         is_raw=True)

                if scale == 0 and record_labels:
                    _input_spans, input_labels = parse_labelarray_data(
                        input_raw_blocks, extract_labels=True)
                    return list(set(chain(*input_labels.values())))
                return []

            # Read from output
            with mgr_client.access_context(output_service.server, True, 1,
                                           np.prod(box_shape)):
                output_raw_blocks = fetch_labelmap_voxels(
                    *output_service.instance_triple,
                    box,
                    scale,
                    False,
                    output_service.supervoxels,
                    format='raw-response')

            # If no differences, no need to parse
            if (input_raw_blocks == output_raw_blocks):
                return []

            input_spans = parse_labelarray_data(input_raw_blocks,
                                                extract_labels=False)
            output_spans = parse_labelarray_data(output_raw_blocks,
                                                 extract_labels=False)

            # Compare block IDs
            input_ids = set(input_spans.keys())
            output_ids = set(output_spans.keys())

            missing_from_output = input_ids - output_ids
            missing_from_input = output_ids - input_ids
            common_ids = input_ids & output_ids

            for block_id in missing_from_input:
                # FIXME: We should pass this in the result so it can be logged in the client, not the worker.
                logger.error(
                    f"Not overwriting block-id: {block_id}.  It doesn't exist in the input."
                )

            # Filter the input blocks so only the new/different ones remain
            filtered_input_list = []
            for block_id in missing_from_output:
                start, stop = input_spans[block_id]
                filtered_input_list.append(
                    (block_id, input_raw_blocks[start:stop]))

            filtered_output_list = []
            for block_id in common_ids:
                in_start, in_stop = input_spans[block_id]
                out_start, out_stop = output_spans[block_id]

                in_buf = input_raw_blocks[in_start:in_stop]
                out_buf = output_raw_blocks[out_start:out_stop]

                if in_buf != out_buf:
                    filtered_input_list.append((block_id, in_buf))
                    filtered_output_list.append((block_id, out_buf))

            # Sort filtered blocks so they appear in the same order in which we received them.
            filtered_input_list = sorted(
                filtered_input_list, key=lambda k_v: input_spans[k_v[0]][0])

            # Post them
            filtered_input_buf = b''.join(
                [buf for (_, buf) in filtered_input_list])
            with mgr_client.access_context(output_service.server, False, 1,
                                           np.prod(box_shape)):
                post_labelmap_blocks(*output_service.instance_triple,
                                     None,
                                     filtered_input_buf,
                                     scale,
                                     output_service.enable_downres,
                                     output_service.disable_indexing,
                                     False,
                                     is_raw=True)

            if scale == 0 and record_labels:
                filtered_output_buf = b''.join(
                    [buf for (_, buf) in filtered_output_list])

                _, filtered_input_labels = parse_labelarray_data(
                    filtered_input_buf, extract_labels=True)
                _, filtered_output_labels = parse_labelarray_data(
                    filtered_output_buf, extract_labels=True)

                input_set = set(chain(*filtered_input_labels.values()))
                output_set = set(chain(*filtered_output_labels.values()))
                return list(input_set - output_set)

            return []