def _process_block(seg_src, mito_cc_src, mito_class_src, body_id, scale, block_coord): import numpy as np import pandas as pd from neuclease.util import ndindex_array from neuclease.dvid import fetch_labelmap_voxels block_box = np.array((block_coord, block_coord+64)) block_seg = fetch_labelmap_voxels(*seg_src, block_box, scale) mito_labels = fetch_labelmap_voxels(*mito_cc_src, block_box, scale) mito_classes = fetch_labelmap_voxels(*mito_class_src, block_box, scale) body_mask = (block_seg == body_id) mito_mask = (mito_labels != 0) & (mito_classes != EMPTY_MITO) mask = (body_mask & mito_mask) if not mask.any(): # No mito voxels of interest in this block return None unraveled_df = pd.DataFrame({'mito_id': mito_labels.reshape(-1), 'mito_class': mito_classes.reshape(-1)}) # pivot_table() doesn't work without a data column to aggregate unraveled_df['voxels'] = 1 # Add coordinate columns to compute centroids raster_coords = ndindex_array(*(64, 64, 64), dtype=np.int32) raster_coords += block_coord unraveled_df['z'] = np.int8(0) unraveled_df['y'] = np.int8(0) unraveled_df['x'] = np.int8(0) unraveled_df[['z', 'y', 'x']] = raster_coords # Drop non-body voxels and non-mito-voxels unraveled_df = unraveled_df.iloc[mask.reshape(-1)] block_table = (unraveled_df[['mito_id', 'mito_class', 'voxels']] .pivot_table(index='mito_id', # noqa columns='mito_class', values='voxels', aggfunc='sum', fill_value=0)) block_table.columns = [f"class_{c}" for c in block_table.columns] block_table['total_size'] = block_table.sum(axis=1).astype(np.int32) # Compute block centroid for each mito # FIXME: I think precision is lost here because I'm using the wrong dtype. mito_points = unraveled_df.groupby('mito_id')[['z', 'y', 'x']].mean().astype(np.float32) block_table = block_table.merge(mito_points, 'left', left_index=True, right_index=True) return block_table
def test_masksegmentation_resume(setup_dvid_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, _input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input brick_shape = config["input"]["geometry"]["message-block-shape"] batch_size = config["masksegmentation"]["batch-size"] # This is the total bricks in the volume, not necessarily # the total *processed* bricks, but it's close enough. total_bricks = np.ceil(np.prod(np.array(volume.shape) / brick_shape)).astype(int) total_batches = int(np.ceil(total_bricks / batch_size)) # Skip over half of the original bricks. config["masksegmentation"]["resume-at"] = { "scale": 0, "batch-index": 1 + (total_batches // 2) } # re-dump config yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] roi_mask = upsample(roi_mask_s5, 2**5) roi_mask = extract_subvol(roi_mask, input_box_zyx) masked_vol = extract_subvol(volume.copy(), input_box_zyx) masked_vol[roi_mask] = 0 output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0, supervoxels=True) #np.save('/tmp/original.npy', volume) #np.save('/tmp/output.npy', output_vol) # First part was untouched assert (output_vol[:128] == volume[:128]).all() # Last part was touched somewhere assert (output_vol[128:] != volume[128:]).any()
def process_point(seg_src, seg_dst, point, radius, src_body, dst_body): """ Generate a neighborhood segment around a particular point. Upload the voxels for the segment and the corresponding mesh. """ r = radius src_box = np.asarray((point - r, point + r + 1)) src_vol = fetch_labelmap_voxels(*seg_src, src_box) if src_body is None: src_body = src_vol[r, r, r] if dst_body is None: # Generate a neighborhood segment ID from the coordinate. # Divide by 4 to ensure the coordinates fit within 2^53. # (The segment ID will not retain the full resolution of # the coordinate, but that's usually OK for our purposes.) dst_body = encode_point_to_uint64(point // 4, 17) mask = (src_vol == src_body) & sphere_mask(r) dst_box = round_box(src_box, 64, 'out') dst_vol = fetch_labelmap_voxels(*seg_dst, dst_box) dst_view = dst_vol[b2s(*(src_box - dst_box[0]))] dst_view[mask] = dst_body post_labelmap_voxels(*seg_dst, dst_box[0], dst_vol, downres=True) # Mesh needs to be written in nm, hence 8x mesh = Mesh.from_binary_vol(mask, 8 * src_box, smoothing_rounds=2) mesh.simplify(0.05, in_memory=True) post_key(*seg_dst[:2], f'{seg_dst[2]}_meshes', f'{dst_body}.ngmesh', mesh.serialize(fmt='ngmesh')) centroid = src_box[0] + mask_centroid(mask, True) top_z = mask.sum(axis=(1, 2)).nonzero()[0][0] top_coords = np.transpose(mask[top_z].nonzero()) top_point = src_box[0] + (top_z, *top_coords[len(top_coords) // 2]) return point, centroid, top_point, src_body, dst_body, mask.sum()
def execute(bbox: Bbox): print('bounding box: ', bbox) box = [tuple(bbox.minpt), tuple(bbox.maxpt)] subvol = dvid.fetch_labelmap_voxels(server, uuid, instance, box, scale=0, supervoxels=supervoxels) # print('cutout volume: \n', subvol) chunk = Chunk(subvol, voxel_offset=bbox.minpt) return [chunk]
def test_fetch_labelmap_voxels(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, supervoxel_vol = labelmap_setup instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation') # Test raw supervoxels voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), supervoxel_vol.shape], supervoxels=True) assert (voxels == supervoxel_vol).all() # Test mapped bodies voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), supervoxel_vol.shape], supervoxels=False) assert (voxels == 1).all() # Test uninflated mode voxels_proxy = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), supervoxel_vol.shape], supervoxels=True, format='lazy-array') assert len(voxels_proxy.content) < supervoxel_vol.nbytes, \ "Fetched data was apparently not compressed" assert (voxels_proxy() == supervoxel_vol).all()
def test_post_labelmap_voxels(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation-scratch') # Write some random data and read it back. vol = np.random.randint(10, size=(128, 128, 128), dtype=np.uint64) offset = (64, 64, 64) post_labelmap_voxels(dvid_server, dvid_repo, 'segmentation-scratch', offset, vol, 0) complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), (256, 256, 256)], supervoxels=True) assert (complete_voxels[64:192, 64:192, 64:192] == vol).all()
def test_post_raw(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup instance_info = (dvid_server, dvid_repo, 'segmentation-scratch') # Write some random data and read it back. data = np.random.randint(10, size=(64, 64, 64 * 3), dtype=np.uint64) offset_zyx = (0, 64, 0) post_raw(*instance_info, offset_zyx, data) complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), (128, 128, 192)], supervoxels=True) assert (complete_voxels[0:64, 64:128, 0:64] == data[:, :, 0:64]).all() assert (complete_voxels[0:64, 64:128, 64:128] == data[:, :, 64:128]).all() assert (complete_voxels[0:64, 64:128, 128:192] == data[:, :, 128:192]).all()
def test_post_labelmap_blocks(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation-scratch') # Write some random data and read it back. blocks = np.random.randint(10, size=(3, 64, 64, 64), dtype=np.uint64) corners_zyx = [[0, 0, 0], [0, 64, 0], [0, 0, 64]] post_labelmap_blocks(dvid_server, dvid_repo, 'segmentation-scratch', corners_zyx, blocks, 0) complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0), (128, 128, 128)], supervoxels=True) assert (complete_voxels[0:64, 0:64, 0:64] == blocks[0]).all() assert (complete_voxels[0:64, 64:128, 0:64] == blocks[1]).all() assert (complete_voxels[0:64, 0:64, 64:128] == blocks[2]).all()
def test_labelmapcopy_partial(setup_dvid_segmentation_input, disable_auto_retry): template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, _output_segmentation_name, partial_output_segmentation_name = setup_dvid_segmentation_input config = copy.deepcopy(config) config["output"]["dvid"][ "segmentation-name"] = partial_output_segmentation_name yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] max_scale = final_config['labelmapcopy']['max-scale'] for scale in range(1 + max_scale): scaled_box = output_box_zyx // (2**scale) output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, partial_output_segmentation_name, scaled_box, scale=scale) assert (output_vol == expected_vols[scale]).all(), \ f"Written vol does not match expected for scale {scale}" # Any labels NOT in the partial vol had to be written. written_labels = pd.unique( expected_vols[0][expected_vols[0] != partial_vol]) assert len(written_labels) > 0, \ "This test data was chosen poorly -- there's no difference between the partial and full labels!" svs = pd.read_csv(f'{execution_dir}/recorded-labels.csv')['sv'] assert set(svs) == set(written_labels)
def test_labelmapcopy(setup_dvid_segmentation_input, disable_auto_retry): template_dir, _config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, _partial_output_segmentation_name = setup_dvid_segmentation_input execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] max_scale = final_config['labelmapcopy']['max-scale'] for scale in range(1 + max_scale): scaled_box = output_box_zyx // (2**scale) output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, scaled_box, scale=scale) assert (output_vol == expected_vols[scale]).all(), \ f"Written vol does not match expected for scale {scale}" svs = pd.read_csv(f'{execution_dir}/recorded-labels.csv')['sv'] assert set(svs) == set(np.unique(expected_vols[0].reshape(-1)))
def download(bounding_box_zyx, output_path): shape = bounding_box_zyx[1] - bounding_box_zyx[0] with h5py.File(output_path, 'w') as f: gray_dset = f.create_dataset('grayscale', shape=shape, dtype=np.uint8, chunks=True) seg_dset = f.create_dataset('segmentation', shape=shape, dtype=np.uint64, chunks=True, compression='gzip') print("Downloading grayscale...") block_shape = (256, 256, 256) block_boxes = boxes_from_grid(bounding_box_zyx, block_shape, clipped=True) for block_box in tqdm(block_boxes): relative_box = block_box - bounding_box_zyx[0] block_gray = fetch_raw(*GRAYSCALE, block_box) overwrite_subvol(gray_dset, relative_box, block_gray) print("") print("Downloading segmentation...") block_boxes = boxes_from_grid(bounding_box_zyx, block_shape, clipped=True) for block_box in tqdm(block_boxes): relative_box = block_box - bounding_box_zyx[0] block_seg = fetch_labelmap_voxels(*SEGMENTATION, block_box) overwrite_subvol(seg_dset, relative_box, block_seg) print("") print("DONE")
def test_connectedcomponents_dvid_subset_labels(setup_connectedcomponents_dvid, disable_auto_retry): template_dir, _config, input_vol, dvid_address, repo_uuid, output_segmentation_name = setup_connectedcomponents_dvid execution_dir, workflow = launch_flow(template_dir, 1) _final_config = workflow.config output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, [(0, 0, 0), input_vol.shape], supervoxels=True) assert output_vol.shape == input_vol.shape final_labels = pd.unique(output_vol.reshape(-1)) # Never change label 0 assert 0 in final_labels assert ((input_vol == 0) == (output_vol == 0)).all() # Single-component objects assert 2 in final_labels assert 4 in final_labels assert ((input_vol == 2) == (output_vol == 2)).all() assert ((input_vol == 4) == (output_vol == 4)).all() # Omitted from analysis; left unsplit assert 3 in final_labels assert ((input_vol == 3) == (output_vol == 3)).all() # Split objects assert 1 not in final_labels for corner in map(np.array, ndrange((0, 0, 0), (1, 8, 8), (1, 4, 4))): box = (corner, corner + 4) input_block = extract_subvol(input_vol, box) output_block = extract_subvol(output_vol, box) for orig_label in [1]: if orig_label in input_block: positions = (input_block == orig_label) assert (input_block[positions] != output_block[positions]).all(), \ f"original label {orig_label} was not split!" assert (output_block[positions] > input_vol.max()).all(), \ f"original label {orig_label} was not split!" # This block-based assertion is not generally true for all possible input, # but our test data blocks are set up so that this is a valid check. # (No block happens to contain more than one final CC that came from the same original label.) assert (output_block[positions] == output_block[positions][0]).all(), \ f"original label {orig_label} ended up over-segmentated" # # Check CSV output # df = pd.read_csv(f'{execution_dir}/relabeled-objects.csv') assert len(df.query('orig_label == 0')) == 0 assert len(df.query('orig_label == 1')) == 3 assert len(df.query('orig_label == 2')) == 0 assert len(df.query('orig_label == 3')) == 0 # 3 was not touched. assert len(df.query('orig_label == 4')) == 0 assert not df['final_label'].duplicated().any() assert (df['final_label'] > input_vol.max()).all() # # Check block stats # with h5py.File(f'{execution_dir}/block-statistics.h5', 'r') as f: stats_df = pd.DataFrame(f['stats'][:]) for row in stats_df.itertuples(): corner = np.array((row.z, row.y, row.x)) block_box = np.array([corner, corner + 64]) block = extract_subvol(output_vol, block_box) assert (block == row.segment_id).sum() == row.count
def test_copysegmentation_from_hdf5_to_dvid_multiscale( setup_hdf5_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, _ = setup_hdf5_segmentation_input # Modify the config from above to compute pyramid scales, # and choose a bounding box that is aligned with the bricks even at scale 2 # (just for easier testing). box_zyx = [[0, 0, 0], [256, 256, 256]] config["input"]["geometry"]["bounding-box"] = box_zyx config["copysegmentation"]["pyramid-depth"] = 2 # Change the segmentation name so it doesn't conflict with earlier tests output_segmentation_name = 'segmentation-output-from-hdf5-multiscale' config["output"]["dvid"]["segmentation-name"] = output_segmentation_name yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) _execution_dir, _workflow = launch_flow(template_dir, 1) box_zyx = np.array(box_zyx) scale_0_vol = volume[box_to_slicing(*box_zyx)] scale_1_vol = downsample_labels(scale_0_vol, 2, True) scale_2_vol = downsample_labels(scale_1_vol, 2, True) output_0_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, box_zyx // 1, scale=0) output_1_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, box_zyx // 2, scale=1) output_2_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, box_zyx // 4, scale=2) # np.save('/tmp/expected-0.npy', scale_0_vol) # np.save('/tmp/expected-1.npy', scale_1_vol) # np.save('/tmp/expected-2.npy', scale_2_vol) # # np.save('/tmp/output-0.npy', output_0_vol) # np.save('/tmp/output-1.npy', output_1_vol) # np.save('/tmp/output-2.npy', output_2_vol) # # np.save('/tmp/diff-0.npy', (output_0_vol != scale_0_vol)) # np.save('/tmp/diff-1.npy', (output_1_vol != scale_1_vol)) # np.save('/tmp/diff-2.npy', (output_2_vol != scale_2_vol)) assert (output_0_vol == scale_0_vol).all(), \ "Scale 0: Written vol does not match expected" assert (output_1_vol == scale_1_vol).all(), \ "Scale 1: Written vol does not match expected" assert (output_2_vol == scale_2_vol).all(), \ "Scale 2: Written vol does not match expected"
def mitos_in_neighborhood(mito_roi_source, neighborhood_origin_xyz, neighborhood_id, mito_res_scale_diff): """ Determine how many non-trivial mito objects overlap with the given "neighborhood object", and return a table of their IDs and sizes. 1. Download the neighborhood mask for the given neighborhood_id. 2. Erode the neighborhood mask by 1 px (see note in the comment above). 3. Fetch the mito segmentation for the voxels within the neighborhood. 4. Fetch (from dvid) the sizes of each mito object. 5. Filter out the mitos that are smaller than the minimum size that is actually used in our published mito analyses. 6. Just for additional info, determine how many connected components are formed by the mito objects. 7. Return the mito IDs, sizses, and CC info as a DataFrame. """ # The neighborhood segmentation source protocol, url = mito_roi_source.split('://')[-2:] server, uuid, instance = url.split('/') server = f'{protocol}://{server}' origin_zyx = np.array(neighborhood_origin_xyz[::-1]) box = [origin_zyx - RADIUS, 1 + origin_zyx + RADIUS] # Align box to the analysis scale before scaling it. box = round_box(box, (2**ANALYSIS_SCALE)) # Scale box box //= (2**ANALYSIS_SCALE) neighborhood_seg = fetch_labelmap_voxels(server, uuid, instance, box, scale=ANALYSIS_SCALE) neighborhood_mask = (neighborhood_seg == neighborhood_id) # This is equivalent to a 1-px erosion # See note above for why we do this. neighborhood_mask ^= binary_edge_mask(neighborhood_mask, 'inner') mito_seg = fetch_labelmap_voxels(*MITO_SEG, box, supervoxels=True, scale=ANALYSIS_SCALE - mito_res_scale_diff) assert neighborhood_mask.shape == mito_seg.shape mito_seg = np.where(neighborhood_mask, mito_seg, 0) # The mito segmentation includes little scraps and slivers # that were filtered out of the "real" mito set. # Filter those scraps out of our results here. mito_ids = set(pd.unique(mito_seg.ravel())) - {0} mito_sizes = fetch_sizes(*MITO_SEG, [*mito_ids], supervoxels=True) mito_sizes = mito_sizes.rename_axis('mito') mito_sizes *= (2**mito_res_scale_diff)**3 # This is our main result: mito IDs (and their sizes) mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE] # Just for extra info, group the mitos we found into connected components. mito_mask = mask_for_labels(mito_seg, mito_sizes.index) mito_box = compute_nonzero_box(mito_mask) mito_mask = extract_subvol(mito_mask, mito_box) mito_seg = extract_subvol(mito_seg, mito_box) mito_cc = label(mito_mask, connectivity=1) ct = contingency_table(mito_seg, mito_cc).reset_index() ct = ct.rename(columns={ 'left': 'mito', 'right': 'cc', 'voxel_count': 'cc_size' }) ct = ct.set_index('mito') mito_sizes = pd.DataFrame(mito_sizes).merge(ct, 'left', left_index=True, right_index=True) return mito_sizes
def test_copysegmentation_from_brainmaps_to_dvid(setup_dvid_repo): """ Fetch a tiny subvolume from a Brainmaps source. To run this test, you must have valid application credentials loaded in your bash environment, For example: export GOOGLE_APPLICATION_CREDENTIALS=/Users/bergs/dvid-em-28a78d822e11.json PYTHONPATH=. pytest -s --tb=native --pyargs tests.workflows.test_copysegmentation -k copysegmentation_from_brainmaps_to_dvid """ dvid_address, repo_uuid = setup_dvid_repo output_segmentation_name = 'segmentation-output-from-brainmaps' box_start = np.array([8000, 23296, 12800]) box_xyz = np.array([box_start, box_start + 256]) box_zyx = box_xyz[:, ::-1] config_text = textwrap.dedent(f"""\ workflow-name: copysegmentation cluster-type: {CLUSTER_TYPE} input: brainmaps: project: '274750196357' dataset: hemibrain volume-id: base20180227_8nm_watershed_fixed change-stack-id: '' # Uh-oh, apparently this change stack is no longer available in BrainMaps?? #change-stack-id: ffn_agglo_20180312_32_16_8_freeze10 geometry: bounding-box: {box_xyz.tolist()} message-block-shape: [6400, 64, 64] block-width: 64 available-scales: [0,1,2] output: dvid: server: {dvid_address} uuid: {repo_uuid} segmentation-name: {output_segmentation_name} supervoxels: true disable-indexing: true create-if-necessary: true geometry: {{}} # Auto-set from input copysegmentation: pyramid-depth: 2 slab-depth: 128 download-pre-downsampled: true """) template_dir = tempfile.mkdtemp(suffix="copysegmentation-from-brainmaps") with open(f"{template_dir}/workflow.yaml", 'w') as f: f.write(config_text) yaml = YAML() with StringIO(config_text) as f: config = yaml.load(f) _execution_dir, _workflow = launch_flow(template_dir, 1) # Fetch the data via a simpler method, and verify that it matches what we stored in DVID. from flyemflows.volumes.brainmaps_volume import BrainMapsVolume bmv = BrainMapsVolume.from_flyem_source_info(config['input']['brainmaps']) for scale in (0, 1, 2): expected_vol = bmv.get_subvolume(box_zyx // 2**scale, scale=scale) assert expected_vol.any(), \ f"Something is wrong with this test: The brainmaps volume at scale {scale} is all zeros!" output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, box_zyx // 2**scale, scale=scale) assert (output_vol == expected_vol).all()
def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask, roi_dilation, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input if invert_mask: roi_mask_s5 = ~roi_mask_s5 config["masksegmentation"]["invert-mask"] = invert_mask config["masksegmentation"]["dilate-roi"] = roi_dilation # re-dump config yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] roi_mask = upsample(roi_mask_s5, 2**5) roi_mask = extract_subvol(roi_mask, input_box_zyx) expected_vol = extract_subvol(volume.copy(), input_box_zyx) expected_vol[roi_mask] = 0 output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0, supervoxels=True) # Create a copy of the volume that contains only the voxels we removed erased_vol = volume.copy() erased_vol[~roi_mask] = 0 if EXPORT_DEBUG_FILES: original_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0, supervoxels=True) original_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0) output_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0) np.save('/tmp/original-svs.npy', original_vol) np.save('/tmp/original-agglo.npy', original_agglo_vol) np.save('/tmp/output.npy', output_vol) np.save('/tmp/output-agglo.npy', output_agglo_vol) np.save('/tmp/expected.npy', expected_vol) np.save('/tmp/erased.npy', erased_vol) shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5') if roi_dilation: shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5', '/tmp/dilated-roi-mask.h5') if invert_mask: shutil.copyfile(f'{execution_dir}/segmentation-mask.h5', '/tmp/segmentation-mask.h5') shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5') if roi_dilation > 0: # FIXME: We don't yet verify voxel-accuracy of ROI dilation. return assert (output_vol == expected_vol).all(), \ "Written vol does not match expected" scaled_expected_vol = expected_vol for scale in range(1, 1 + MAX_SCALE): scaled_expected_vol = downsample(scaled_expected_vol, 2, 'labels-numba') scaled_output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx // 2**scale, scale=scale, supervoxels=True) if EXPORT_DEBUG_FILES: np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/output-{scale}.npy', scaled_output_vol) if scale <= 5: assert (scaled_output_vol == scaled_expected_vol).all(), \ f"Written vol does not match expected at scale {scale}" else: # For scale 6 and 7, some blocks are not even changed, # but that means we would be comparing DVID's label # downsampling method to our method ('labels-numba'). # The two don't necessarily give identical results in the case of 'ties', # so we'll just verify that the nonzero voxels match, at least. assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \ f"Written vol does not match expected at scale {scale}" block_stats_path = f'{execution_dir}/erased-block-statistics.h5' with h5py.File(block_stats_path, 'r') as f: stats_df = pd.DataFrame(f['stats'][:]) # # Check the exported block statistics # stats_cols = [*BLOCK_STATS_DTYPES.keys()] assert stats_df.columns.tolist() == stats_cols stats_df = stats_df.sort_values(stats_cols).reset_index() expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol, input_box_zyx) expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index() assert len(stats_df) == len(expected_stats_df) assert (stats_df == expected_stats_df).all().all() # # Try updating the labelindexes # src_info = (dvid_address, repo_uuid, input_segmentation_name) dest_info = (dvid_address, repo_uuid, output_segmentation_name) with switch_cwd(execution_dir): erase_from_labelindexes(src_info, dest_info, block_stats_path, batch_size=10, threads=4) # Verify deleted supervoxels assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv') deleted_svs = set( pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv']) orig_svs = {*pd.unique(volume.reshape(-1))} - {0} remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0} expected_deleted_svs = orig_svs - remaining_svs assert deleted_svs == expected_deleted_svs # Verify remaining sizes expected_sv_counts = (pd.Series( expected_vol.reshape(-1), name='sv').value_counts().drop(0).sort_index().rename('count')) index_dfs = [] for body in np.unique(fetch_mapping(*dest_info, remaining_svs)): index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks index_dfs.append(index_df) sv_counts = (pd.concat(index_dfs, ignore_index=True)[[ 'sv', 'count' ]].groupby('sv')['count'].sum().sort_index()) assert set(sv_counts.index.values) == set(expected_sv_counts.index.values) assert (sv_counts == expected_sv_counts).all(), \ pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count') # Verify mapping # Deleted supervoxels exist in the mapping, but they map to 0. assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all() # Remaining supervoxels still map to their original bodies assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping( *src_info, [*remaining_svs])).all()
def setup_dvid_segmentation_input(setup_dvid_repo, random_segmentation): dvid_address, repo_uuid = setup_dvid_repo input_segmentation_name = 'labelmapcopy-segmentation-input' output_segmentation_name = 'labelmapcopy-segmentation-output' partial_output_segmentation_name = 'labelmapcopy-segmentation-partial-output' max_scale = 3 already_exists = False try: create_labelmap_instance(dvid_address, repo_uuid, input_segmentation_name, max_scale=max_scale) create_labelmap_instance(dvid_address, repo_uuid, partial_output_segmentation_name, max_scale=max_scale) except HTTPError as ex: if ex.response is not None and 'already exists' in ex.response.content.decode( 'utf-8'): already_exists = True expected_vols = {} for scale in range(1 + max_scale): if scale == 0: scaled_vol = random_segmentation else: scaled_vol = downsample(scaled_vol, 2, 'labels-numba') expected_vols[scale] = scaled_vol if not already_exists: scaled_box = round_box([(0, 0, 0), scaled_vol.shape], 64, 'out') aligned_vol = np.zeros(scaled_box[1], np.uint64) overwrite_subvol(aligned_vol, [(0, 0, 0), scaled_vol.shape], scaled_vol) post_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, (0, 0, 0), aligned_vol, scale=scale) if not already_exists: # Create a 'partial' output volume that is the same (bitwise) as the input except for some blocks. scaled_box = np.array([(0, 0, 0), random_segmentation.shape]) scaled_box[1, -1] = 192 for scale in range(1 + max_scale): scaled_box = round_box(scaled_box // (2**scale), 64, 'out') raw_blocks = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, scaled_box, scale, supervoxels=True, format='raw-response') post_labelmap_blocks(dvid_address, repo_uuid, partial_output_segmentation_name, [(0, 0, 0)], raw_blocks, scale, is_raw=True) block = np.random.randint(1_000_000, 1_000_010, size=(64, 64, 64), dtype=np.uint64) post_labelmap_voxels(dvid_address, repo_uuid, partial_output_segmentation_name, (0, 128, 64), block, 0, downres=True) partial_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, partial_output_segmentation_name, [(0, 0, 0), random_segmentation.shape], supervoxels=True) template_dir = tempfile.mkdtemp(suffix="labelmapcopy-template") config_text = textwrap.dedent(f"""\ workflow-name: labelmapcopy cluster-type: {CLUSTER_TYPE} input: dvid: server: {dvid_address} uuid: {repo_uuid} segmentation-name: {input_segmentation_name} supervoxels: true geometry: message-block-shape: [512,64,64] available-scales: [0,1,2,3] output: dvid: server: {dvid_address} uuid: {repo_uuid} segmentation-name: {output_segmentation_name} supervoxels: true disable-indexing: true create-if-necessary: true labelmapcopy: slab-shape: [512,128,64] dont-overwrite-identical-blocks: true """) with open(f"{template_dir}/workflow.yaml", 'w') as f: f.write(config_text) yaml = YAML() with StringIO(config_text) as f: config = yaml.load(f) return template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, partial_output_segmentation_name
def neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, body_id, scale=0, min_size=0, search_radius=50, processes=1): from functools import partial import numpy as np import pandas as pd from neuclease.util import compute_parallel from neuclease.dvid import fetch_sparsevol_coarse, resolve_ref, fetch_labels, fetch_labelmap_voxels seg_src[1] = resolve_ref(*seg_src[:2]) mito_cc_src[1] = resolve_ref(*mito_cc_src[:2]) mito_class_src[1] = resolve_ref(*mito_class_src[:2]) # Fetch block coords; re-scale for the analysis scale block_coords = (2**6) * fetch_sparsevol_coarse(*seg_src, body_id) bc_df = pd.DataFrame(block_coords, columns=[*'zyx']) bc_df[[*'zyx']] //= 2**scale block_coords = bc_df.drop_duplicates().values # # Blockwise stats # block_fn = partial(_process_block, seg_src, mito_cc_src, mito_class_src, body_id, scale) block_tables = compute_parallel(block_fn, block_coords, processes=processes) block_tables = [*filter(lambda t: t is not None, block_tables)] # # Combine stats # full_table = pd.concat(block_tables, sort=True).fillna(0) class_cols = [*filter(lambda c: c.startswith('class'), full_table.columns)] full_table = full_table.astype({c: np.int32 for c in class_cols}) # Weight each block centroid by the block's voxel count before taking the mean full_table[[*'zyx']] *= full_table[['total_size']].values stats_df = full_table.groupby('mito_id').sum() stats_df[[*'zyx']] /= stats_df[['total_size']].values # Drop tiny mitos stats_df = stats_df.query("total_size >= @min_size").copy() # Assume all centroids are 'exact' by default (overwritten below if necessary) stats_df['centroid_type'] = 'exact' # Include a column for 'body' even thought its the same on every row, # just as a convenience for concatenating these results with the results # from other bodies if desired. stats_df['body'] = body_id stats_df = stats_df.astype({a: np.int32 for a in 'zyx'}) stats_df = stats_df[['body', *'xyz', 'total_size', *class_cols, 'centroid_type']] # # Check for centroids that fall outside of the mito, # and adjust them if necessary. # centroid_mitos = fetch_labels(*mito_cc_src, stats_df[[*'zyx']].values, scale=scale) mismatches = stats_df.index[(stats_df.index != centroid_mitos)] if len(mismatches) == 0: return stats_df logger.warning("Some mitochondria centroids do not lie within the mitochondria itself. " "Searching for pseudo-centroids.") # construct field of distances from the central voxel sr = search_radius cz, cy, cx = np.ogrid[-sr:sr+1, -sr:sr+1, -sr:sr+1] distances = np.sqrt(cz**2 + cy**2 + cx**2) pseudo_centroids = [] error_mito_ids = [] for row in stats_df.loc[mismatches].itertuples(): mito_id = row.Index centroid = np.array((row.z, row.y, row.x)) box = (centroid - sr, 1 + centroid + sr) mito_mask = (mito_id == fetch_labelmap_voxels(*mito_cc_src, box, scale)) if not mito_mask.any(): pseudo_centroids.append((row.z, row.y, row.x)) error_mito_ids.append(mito_id) continue # Find minimum distance masked_distances = np.where(mito_mask, distances, np.inf) new_centroid = np.unravel_index(np.argmin(masked_distances), masked_distances.shape) new_centroid = np.array(new_centroid) + centroid - sr pseudo_centroids.append(new_centroid) stats_df.loc[mismatches, ['z', 'y', 'x']] = np.array(pseudo_centroids, dtype=np.int32) stats_df.loc[mismatches, 'centroid_type'] = 'adjusted' stats_df.loc[error_mito_ids, 'centroid_type'] = 'error' if error_mito_ids: logger.warning("Some mitochondria pseudo-centroids could not be found.") stats_df = stats_df.astype({a: np.int32 for a in 'zyx'}) return stats_df
def get_subvolume(self, box_zyx, scale=0): req_bytes = self._dtype_nbytes * np.prod(box_zyx[1] - box_zyx[0]) instance_name = self._instance_name if self._instance_type.endswith('blk') and scale > 0: # Grayscale multi-scale is achieved via multiple instances instance_name = f"{instance_name}_{scale}" scale = 0 try: if self._instance_type in ('labelarray', 'labelmap'): # Obtain permission from the resource manager while fetching the compressed data, # but release the resource token before inflating the data. with self._resource_manager_client.access_context( self._server, True, 1, req_bytes): aligned_box = round_box(box_zyx, 64, 'out') if 8 * np.prod(aligned_box[1] - aligned_box[0]) < 2**31: vol_proxy = fetch_labelmap_voxels( self._server, self._uuid, instance_name, box_zyx, scale, self._throttle, supervoxels=self.supervoxels, format='lazy-array') else: # Requested subvolume is too large to download in one request. # Download it in chunks, with a somewhat arbitrary chunkshape chunk_shape = (64, 128, 10240) vol_proxy = fetch_labelmap_voxels_chunkwise( self._server, self._uuid, instance_name, box_zyx, scale, self._throttle, supervoxels=self.supervoxels, format='lazy-array', chunk_shape=chunk_shape) # Inflate after releasing resource token return vol_proxy() else: with self._resource_manager_client.access_context( self._server, True, 1, req_bytes): return fetch_raw(self._server, self._uuid, instance_name, box_zyx, self._throttle) except Exception as ex: # In cluster scenarios, a chained 'raise ... from ex' traceback # doesn't get fully transmitted to the driver, # so we simply append this extra info to the current exception # rather than using exception chaining. # Also log it now so it at least appears in the worker log. # See: https://github.com/dask/dask/issues/4384 import traceback, io sio = io.StringIO() traceback.print_exc(file=sio) logger.log(logging.ERROR, sio.getvalue()) host = socket.gethostname() msg = f"Host {host}: Failed to fetch subvolume: box_zyx = {box_zyx.tolist()}" ex.args += (msg, ) raise
def _fetch_svs(master_seg, box): vol = fetch_labelmap_voxels(*master_seg, box, supervoxels=True) return set(pd.unique(vol.reshape(-1)))
def autogen_points(input_seg, count, roi, body, tbars, use_skeleton, random_seed=None, minimum_distance=0): """ Generate a list of points within the input segmentation, based on the given criteria. See the main help text below for details. """ if tbars and not body: sys.exit( "If you want to auto-generate tbar points, please specify a body.") if not tbars and not count: sys.exit( "You must supply a --count unless you are generating all tbars of a body." ) if use_skeleton: if not body: sys.exit( "You must supply a body ID if you want to use a skeleton.") if tbars: sys.exit( "You can't select both tbar points and skeleton points. Pick one or the other." ) if not count and minimum_distance > 0: sys.exit( "You must supply a --count if you want skeleton point samples to respect the minimum distance." ) if not count and not roi and minimum_distance == 0: logger.warning( "You are using all nodes of a skeleton without any ROI filter! Is that what you meant?" ) rng = default_rng(random_seed) if tbars: logger.info(f"Fetching synapses for body {body}") syn_df = fetch_annotation_label(*input_seg[:2], 'synapses', body, format='pandas') tbars = syn_df.query('kind == "PreSyn"')[[*'zyx']] if roi: logger.info(f"Filtering tbars for roi {roi}") determine_point_rois(*input_seg[:2], [roi], tbars) tbars = tbars.query('roi == @roi')[[*'zyx']] if minimum_distance: logger.info( f"Pruning close points from {len(tbars)} total tbar points") tbars = prune_close_pairs(tbars, minimum_distance, rng) logger.info(f"After pruning, {len(tbars)} tbars remain.") if count: count = min(count, len(tbars)) logger.info(f"Sampling {count} tbars") choices = rng.choice(tbars.index, size=count, replace=False) tbars = tbars.loc[choices] logger.info(f"Returning {len(tbars)} tbar points") return tbars elif use_skeleton: assert body logger.info(f"Fetching skeleton for body {body}") skeleton_instance = f'{input_seg[2]}_skeletons' swc = fetch_key(*input_seg[:2], skeleton_instance, f'{body}_swc') skeleton_df = swc_to_dataframe(swc) skeleton_df['x'] = skeleton_df['x'].astype(int) skeleton_df['y'] = skeleton_df['y'].astype(int) skeleton_df['z'] = skeleton_df['z'].astype(int) if roi: logger.info(f"Filtering skeleton for roi {roi}") determine_point_rois(*input_seg[:2], [roi], skeleton_df) skeleton_df = skeleton_df.query('roi == @roi')[[*'zyx']] if minimum_distance: assert count # Distance-pruning is very expensive on a huge number of close points. # If skeleton is large, first reduce the workload by pre-selecting a # random sample of skeleton points, and prune more from there. if len(skeleton_df) > 10_000: # FIXME: random_state can't use rng until I upgrade to pandas 1.0 skeleton_df = skeleton_df.sample(min(4 * count, len(skeleton_df)), random_state=None) logger.info( f"Pruning close points from {len(skeleton_df)} skeleton points" ) prune_close_pairs(skeleton_df, minimum_distance, rng) logger.info( f"After pruning, {len(skeleton_df)} skeleton points remain.") if count: count = min(count, len(skeleton_df)) logger.info(f"Sampling {count} skeleton points") choices = rng.choice(skeleton_df.index, size=count, replace=False) skeleton_df = skeleton_df.loc[choices] logger.info(f"Returning {len(skeleton_df)} skeleton points") return skeleton_df elif body: assert count if roi: # TODO: intersect the ranges with the ROI. raise NotImplementedError( "Sorry, I haven't yet implemented support for " "body+roi filtering. Pick one or the other, " "or ask Stuart to fix this.") logger.info(f"Fetching sparsevol for body {body}") ranges = fetch_sparsevol(*input_seg, body, format='ranges') logger.info("Sampling from sparsevol") if minimum_distance > 0: # Sample 4x extra so we still have enough after pruning. points = sample_points_from_ranges(ranges, 4 * count, rng) else: points = sample_points_from_ranges(ranges, count, rng) points = pd.DataFrame(points, columns=[*'zyx']) if minimum_distance > 0: logger.info(f"Pruning close points from {len(points)} body points") prune_close_pairs(points, minimum_distance, rng) logger.info(f"After pruning, {len(points)} body points remain") points = points.iloc[:count] logger.info(f"Returning {len(points)} body points") return points elif roi: assert count logger.info(f"Fetching roi {roi}") roi_ranges = fetch_roi_roi(*input_seg[:2], roi, format='ranges') logger.info("Sampling from ranges") if minimum_distance > 0: # Sample 4x extra so we can prune some out if necessary. points_s5 = sample_points_from_ranges(roi_ranges, 4 * count, rng) else: points_s5 = sample_points_from_ranges(roi_ranges, count, rng) corners_s0 = points_s5 * (2**5) points_s0 = rng.integers(corners_s0, corners_s0 + (2**5)) points = pd.DataFrame(points_s0, columns=[*'zyx']) if minimum_distance > 0: logger.info(f"Pruning close points from {len(points)} roi points") prune_close_pairs(points, minimum_distance, rng) logger.info( f"After pruning, points from {len(points)} roi points remain") points = points.iloc[:count] logger.info(f"Returning {len(points)} roi points") return points else: # No body or roi specified, just sample from the whole non-zero segmentation area assert count logger.info("Sampling random points from entire input segmentation") logger.info("Fetching low-res input volume") box_s6 = round_box(fetch_volume_box(*input_seg), 2**6, 'out') // 2**6 seg_s6 = fetch_labelmap_voxels(*input_seg, box_s6, scale=6) mask_s6 = seg_s6.astype(bool) logger.info("Encoding segmentation as ranges") seg_ranges = runlength_encode_mask_to_ranges(mask_s6, box_s6) logger.info("Sampling from ranges") if minimum_distance > 0: # Sample 4x extra so we can prune some out if necessary. points_s6 = sample_points_from_ranges(seg_ranges, 4 * count, rng) else: points_s6 = sample_points_from_ranges(seg_ranges, count, rng) corners_s0 = points_s6 * (2**6) points_s0 = rng.integers(corners_s0, corners_s0 + (2**6)) points = pd.DataFrame(points_s0, columns=[*'zyx']) if minimum_distance > 0: logger.info( f"Pruning close points from {len(points)} segmentation points") prune_close_pairs(points, minimum_distance, rng) logger.info( f"After pruning, points from {len(points)} segmentation points remain" ) points = points.iloc[:count] logger.info(f"Returning {len(points)} segmentation points") return points
def copy_box(box, scale): assert not record_only or scale == 0 box = round_box(box, 64, 'out') box_shape = (box[1] - box[0]) # Read input blocks with mgr_client.access_context(input_service.server, True, 1, np.prod(box_shape)): input_raw_blocks = fetch_labelmap_voxels( *input_service.instance_triple, box, scale, False, input_service.supervoxels, format='raw-response') # If we're just recording, parse and return if scale == 0 and record_only: _input_spans, input_labels = parse_labelarray_data( input_raw_blocks, extract_labels=True) return list(set(chain(*input_labels.values()))) # If not checking the output, just copy input to output if not check_existing: with mgr_client.access_context(output_service.server, False, 1, np.prod(box_shape)): post_labelmap_blocks(*output_service.instance_triple, None, input_raw_blocks, scale, output_service.enable_downres, output_service.disable_indexing, False, is_raw=True) if scale == 0 and record_labels: _input_spans, input_labels = parse_labelarray_data( input_raw_blocks, extract_labels=True) return list(set(chain(*input_labels.values()))) return [] # Read from output with mgr_client.access_context(output_service.server, True, 1, np.prod(box_shape)): output_raw_blocks = fetch_labelmap_voxels( *output_service.instance_triple, box, scale, False, output_service.supervoxels, format='raw-response') # If no differences, no need to parse if (input_raw_blocks == output_raw_blocks): return [] input_spans = parse_labelarray_data(input_raw_blocks, extract_labels=False) output_spans = parse_labelarray_data(output_raw_blocks, extract_labels=False) # Compare block IDs input_ids = set(input_spans.keys()) output_ids = set(output_spans.keys()) missing_from_output = input_ids - output_ids missing_from_input = output_ids - input_ids common_ids = input_ids & output_ids for block_id in missing_from_input: # FIXME: We should pass this in the result so it can be logged in the client, not the worker. logger.error( f"Not overwriting block-id: {block_id}. It doesn't exist in the input." ) # Filter the input blocks so only the new/different ones remain filtered_input_list = [] for block_id in missing_from_output: start, stop = input_spans[block_id] filtered_input_list.append( (block_id, input_raw_blocks[start:stop])) filtered_output_list = [] for block_id in common_ids: in_start, in_stop = input_spans[block_id] out_start, out_stop = output_spans[block_id] in_buf = input_raw_blocks[in_start:in_stop] out_buf = output_raw_blocks[out_start:out_stop] if in_buf != out_buf: filtered_input_list.append((block_id, in_buf)) filtered_output_list.append((block_id, out_buf)) # Sort filtered blocks so they appear in the same order in which we received them. filtered_input_list = sorted( filtered_input_list, key=lambda k_v: input_spans[k_v[0]][0]) # Post them filtered_input_buf = b''.join( [buf for (_, buf) in filtered_input_list]) with mgr_client.access_context(output_service.server, False, 1, np.prod(box_shape)): post_labelmap_blocks(*output_service.instance_triple, None, filtered_input_buf, scale, output_service.enable_downres, output_service.disable_indexing, False, is_raw=True) if scale == 0 and record_labels: filtered_output_buf = b''.join( [buf for (_, buf) in filtered_output_list]) _, filtered_input_labels = parse_labelarray_data( filtered_input_buf, extract_labels=True) _, filtered_output_labels = parse_labelarray_data( filtered_output_buf, extract_labels=True) input_set = set(chain(*filtered_input_labels.values())) output_set = set(chain(*filtered_output_labels.values())) return list(input_set - output_set) return []