def test_dvid_volume_service_branch(setup_dvid_repo, disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-branch'

    volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8)
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: master
          grayscale-name: {instance_name}
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)
    assert service.uuid == uuid
def test_no_adapter(setup_hdf5_service):
    _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)
    assert volume_config["adapters"]["rescale-level"] is None
    reader = VolumeService.create_from_config(volume_config)
    assert isinstance(reader, Hdf5VolumeService), \
        "Should not create a ScaledVolumeService adapter at all if rescale-level is null"
def parse_workflow_config(template_dir):
    """
    Load workflow.yaml to determine the input volume info and subset-bodies CSV path.
    """
    # Late imports so --help works quickly
    from flyemflows.volumes import VolumeService
    from flyemflows.workflow import Workflow, CreateMeshes

    workflow_cls, workflow_config = Workflow.load_workflow_config(template_dir)

    assert workflow_cls == CreateMeshes
    assert 'dvid' in workflow_config["input"], \
        "This daemon only works with DVID labelmap sources"

    dvid_service = VolumeService.create_from_config(
        workflow_config["input"]).original_volume_service
    server, _uuid, instance = dvid_service.instance_triple

    # If the config mentions a branch instead of a
    # specific uuid, keep that, not the pre-resolved uuid
    uuid = workflow_config["input"]["dvid"]["uuid"]

    body_csv = workflow_config["createmeshes"]["subset-bodies"]
    assert body_csv == "bodies-to-update.csv", \
        "Your config must have a 'subset-bodies' setting, and it must point to "\
        "bodies-to-update.csv (which will be overwritten by this daemon)"

    seg_instance = (server, uuid, instance)
    return seg_instance, body_csv
def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-grayscale'

    volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8)
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          grayscale-name: {instance_name}
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        if scale == 0:
            assert instance_name in repo_instances
            assert repo_instances[instance_name] == 'uint8blk'
        else:
            assert f"{instance_name}_{scale}" in repo_instances
            assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk'

        vol = downsample(volume, 2**scale,
                         'label')  # label downsampling is easier to test with
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint8)
        overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol)
        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()
def test_available_Scales(setup_hdf5_service):
    _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)

    volume_config["adapters"]["rescale-level"] = {
        "level": 1,
        "available-scales": [0, 1, 2]
    }
    reader = VolumeService.create_from_config(volume_config)
    assert reader.available_scales == [0, 1, 2]
def test_sample_labels(setup_hdf5_service):
    _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)

    downsampled = downsample(full_from_h5, 2, 'block-mean')
    points = [np.random.randint(d, size=(10, )) for d in downsampled.shape]
    points = np.transpose(points)

    # Scale 1
    volume_config["adapters"]["rescale-level"] = 1
    scaled_reader = VolumeService.create_from_config(volume_config)

    labels = scaled_reader.sample_labels(points)
    assert (labels == downsampled[(*points.transpose(), )]).all()
def test_full_volume_downsample_1(setup_hdf5_service):
    _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)

    # Scale 1
    volume_config["adapters"]["rescale-level"] = 1
    scaled_reader = VolumeService.create_from_config(volume_config)

    assert (scaled_reader.bounding_box_zyx == h5_reader.bounding_box_zyx //
            2).all()
    assert (scaled_reader.preferred_message_shape ==
            h5_reader.preferred_message_shape // 2).all()
    assert scaled_reader.block_width == h5_reader.block_width // 2
    assert scaled_reader.dtype == h5_reader.dtype

    full_scaled = scaled_reader.get_subvolume(scaled_reader.bounding_box_zyx)
    assert (full_scaled == downsample(full_from_h5, 2, 'block-mean')).all()
    assert full_scaled.flags.c_contiguous
        "zarr": {
            "path": "/Users/bergs/data/hemibrain-v1.2.zarr",
            "dataset": "s2",
            "store-type": "NestedDirectoryStore",
            "out-of-bounds-access": "permit-empty"
        },
        "adapters": {
            "rescale-level": -2
        }
    }
    mito_cfg = {
        "zarr": {
            "path": "/Users/bergs/data/hemibrain-v1.2-filtered-mito-cc.zarr",
            "dataset": "s3",
            "store-type": "NestedDirectoryStore",
            "out-of-bounds-access": "permit-empty"
        },
        "adapters": {
            "rescale-level": -3
        }
    }
    seg_svc = VolumeService.create_from_config(seg_cfg)
    mito_svc = VolumeService.create_from_config(mito_cfg)

    valid_mitos = fetch_supervoxels('emdata4:8900', '3159', 'mito-objects', body)

    processed_tbars = measure_tbar_mito_distances(seg_svc, mito_svc, body, tbars=tbars, valid_mitos=valid_mitos)
    cols = ['bodyId', 'type', *'xyz', 'mito-distance', 'crossed-gap', 'mito-id', 'mito-x', 'mito-y', 'mito-z', 'search-radius', 'download-scale', 'analysis-scale', 'focal-x']
    print(processed_tbars[cols])
    processed_tbars.to_csv('/tmp/tbar-test-results.csv')
Beispiel #9
0
def correct_centroids(config,
                      stats_df,
                      check_scale=0,
                      verify=False,
                      threads=0,
                      processes=8):
    import numpy as np
    import pandas as pd

    from neuclease.util import compute_parallel, Timer
    from flyemflows.volumes import VolumeService

    with Timer("Pre-sorting points by block", logger):
        stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0)
        stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64
        stats_df.sort_values(['bz', 'by', 'bx'], inplace=True)
        stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True)

    if config['mito-sparsevol-source'] is not None:
        sparsevol_source = VolumeService.create_from_config(
            config['mito-sparsevol-source'])
        point_source = sparsevol_source
    else:
        sparsevol_source = None
        point_source = None

    if config['mito-point-source']:
        point_source = VolumeService.create_from_config(
            config['mito-point-source'])

    assert point_source or sparsevol_source, \
        "You must provide either a point-source or sparsevol-source."

    stats_df['centroid_label'] = sample_labels(point_source, stats_df,
                                               check_scale, threads, processes)

    mismatched_mitos = stats_df.query('centroid_label != mito_id')

    logger.info(
        f"Correcting {len(mismatched_mitos)} mismatched mito centroids")

    if sparsevol_source:
        _find_mito = partial(find_mito_from_sparsevol,
                             *sparsevol_source.instance_triple)
        mitos_and_coords = compute_parallel(_find_mito,
                                            mismatched_mitos.index,
                                            ordered=False,
                                            threads=threads,
                                            processes=processes)
    else:
        _find_mito = partial(find_mito_from_seg, point_source, check_scale)
        mismatched_rows = mismatched_mitos.reset_index()[['mito_id',
                                                          *'zyx']].astype(
                                                              np.int64).values
        mitos_and_coords = compute_parallel(_find_mito,
                                            mismatched_rows,
                                            starmap=True,
                                            ordered=False,
                                            threads=threads,
                                            processes=processes)

    corrected_df = pd.DataFrame(mitos_and_coords,
                                columns=['mito_id',
                                         *'zyx']).set_index('mito_id')
    stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']]

    stats_df['centroid_type'] = 'exact'
    stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted'

    # Sanity check: they should all be correct now!
    if verify:
        new_labels = sample_labels(point_source,
                                   stats_df.loc[mismatched_mitos.index],
                                   check_scale, threads, processes)
        if (new_labels != mismatched_mitos.index).any():
            logger.error("Some mitos remained mismstached!")

    return stats_df
Beispiel #10
0
def correct_centroids(config, stats_df, check_scale=0, verify=False, threads=0, processes=8):
    import numpy as np
    import pandas as pd

    from neuclease.util import tqdm_proxy, compute_parallel, Timer
    from neuclease.dvid import fetch_labels_batched
    from flyemflows.volumes import VolumeService, DvidVolumeService

    with Timer("Pre-sorting points by block", logger):
        stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0)
        stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64
        stats_df.sort_values(['bz', 'by', 'bx'], inplace=True)
        stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True)

    sparsevol_source = VolumeService.create_from_config(config['mito-sparsevol-source'])
    if config['mito-point-source'] is None:
        point_source = sparsevol_source
    else:
        point_source = VolumeService.create_from_config(config['mito-point-source'])

    if isinstance(point_source, DvidVolumeService):
        stats_df['centroid_label'] = fetch_labels_batched(*point_source.instance_triple,
                                                          stats_df[[*'zyx']] // (2**check_scale),
                                                          supervoxels=point_source.supervoxels,
                                                          scale=check_scale,
                                                          batch_size=1000,
                                                          threads=threads,
                                                          processes=processes)
    else:
        import multiprocessing as mp
        import dask
        from dask.diagnostics import ProgressBar

        if threads:
            pool = mp.pool.ThreadPool(threads)
        else:
            pool = mp.pool.Pool(processes)

        dask.config.set(scheduler='processes')
        with pool, dask.config.set(pool=pool), ProgressBar():
            centroids = stats_df[[*'zyx']] // (2**check_scale)
            stats_df['centroid_label'] = point_source.sample_labels( centroids, scale=check_scale )

    mismatched_mitos = stats_df.query('centroid_label != mito_id').index

    logger.info(f"Correcting {len(mismatched_mitos)} mismatched mito centroids")
    _find_mito = partial(find_mito, *sparsevol_source.instance_triple)
    mitos_and_coords = compute_parallel(_find_mito, mismatched_mitos, ordered=False, threads=threads, processes=processes)
    corrected_df = pd.DataFrame(mitos_and_coords, columns=['mito_id', *'zyx']).set_index('mito_id')
    stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']]
    stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted'

    # Sanity check: they should all be correct now!
    if verify:
        new_centroids = stats_df.loc[mismatched_mitos, [*'zyx']].values
        new_labels = fetch_labels_batched(*sparsevol_source.instance_triple,
                                          new_centroids,
                                          supervoxels=True,
                                          threads=threads,
                                          processes=processes)

        if (new_labels != mismatched_mitos).any():
            logger.error("Some mitos remained mismstached!")

    return stats_df
def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation,
                                      disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-labelmap'

    volume = random_segmentation[:256, :192, :128]
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          segmentation-name: {instance_name}
          supervoxels: true
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
          message-block-shape: [64,64,64]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    assert instance_name in repo_instances
    assert repo_instances[instance_name] == 'labelmap'

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        vol = downsample(volume, 2**scale, 'label')
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint64)
        overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol)

        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()

    #
    # Check sparse coords function
    #
    labels = list({*pd.unique(volume.reshape(-1))} - {0})
    brick_coords_df = service.sparse_brick_coords_for_labels(labels)

    assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label']
    assert set(brick_coords_df['label'].values) == set(labels), \
        "Some labels were missing from the sparse brick coords!"

    def ndi(shape):
        return np.indices(shape).reshape(len(shape), -1).transpose()

    expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx'])

    expected_df['label'] = volume.reshape(-1)
    expected_df['z'] //= 64
    expected_df['y'] //= 64
    expected_df['x'] //= 64
    expected_df = expected_df.drop_duplicates()
    expected_df['z'] *= 64
    expected_df['y'] *= 64
    expected_df['x'] *= 64

    expected_df = expected_df.query('label != 0')

    expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)
    brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)

    expected_df.reset_index(drop=True, inplace=True)
    brick_coords_df.reset_index(drop=True, inplace=True)

    assert expected_df.shape == brick_coords_df.shape
    assert (brick_coords_df == expected_df).all().all()

    #
    # Check sample_labels()
    #
    points = [np.random.randint(d, size=(10, )) for d in vol.shape]
    points = np.transpose(points)
    labels = service.sample_labels(points)
    assert (labels == volume[(*points.transpose(), )]).all()
Beispiel #12
0
def convert_grayscale(config_path, client=None):
    """
    Simple example showing how to:
     - create an input service (agnostic to data format)
     - read it into a distributed array (BrickWall)
     - realign it to an output array
     - write the realigned data (agnostic to format)
    
    The input will be accessed according to it's preferred access pattern,
    and the output will be written according to it's preferreed access pattern
    (e.g. entire slices if reading from a PNG stack, or blocks if using N5.)

    Caveats:
     
     - This does not implement a Workflow subclass
       (though there isn't much more to it).
     
     - For simplicity, this code assumes that the entire volume can be loaded
       into your cluster's RAM.  For large volumes, that won't work.
       A more robust solution would split the input volume into large
       "slabs" and process them each in turn.

    Example:

        # Set up some input data
        from flyemflows.util.n5 import export_to_multiscale_n5
        volume = np.random.randint(255, size=(500,500,500), dtype=np.uint8)
        export_to_multiscale_n5(volume, '/tmp/test-vol.n5')

        # Write the config file:
        cat < /tmp/test-config.yaml
        input:
          n5:
            path: /tmp/test-vol.n5
            dataset: 's0'

        output:
          slice-files:
            slice-path-format: '/tmp/test-slices/z{:04}.png'

        # Run this script:
        python convert_grayscale.py /tmp/test-config.yaml

    """
    # Define the config file schema
    schema = {
        "properties": {
            "input": GrayscaleVolumeSchema,
            "output": GrayscaleVolumeSchema
        }
    }
    
    # Load config (injects defaults for missing values)
    config = load_config(config_path, schema)

    # Create input service and input 'bricks'
    input_svc = VolumeService.create_from_config(config["input"])
    input_wall = BrickWall.from_volume_service(input_svc, client=client)

    # Copy bounding box from input to output
    config["output"]["geometry"]["bounding-box"] = config["input"]["geometry"]["bounding-box"]

    # Create output service and redistribute
    # data using the output's preferred grid
    output_svc = VolumeService.create_from_config(config["output"])
    output_grid = Grid(output_svc.preferred_message_shape)
    output_wall = input_wall.realign_to_new_grid(output_grid)

    # Write the data: one task per output brick
    # (e.g. output slices, if exporting to PNGs)
    def write_brick(brick):
        output_svc.write_subvolume(brick.volume, brick.physical_box[0])
    output_wall.bricks.map(write_brick).compute()

    print(f"DONE exporting")
def test_copygrayscale_from_hdf5_to_n5(disable_auto_retry):
    template_dir = tempfile.mkdtemp(suffix="copygrayscale-hdf5-to-n5-template")

    SHAPE = (250, 240, 230)

    # Create volume, write to HDF5
    volume = np.random.randint(10, size=SHAPE, dtype=np.uint8)
    volume_path = f"{template_dir}/volume.h5"
    with h5py.File(volume_path, 'w') as f:
        f['volume'] = volume

    config_text = textwrap.dedent(f"""\
        workflow-name: copygrayscale
        cluster-type: {CLUSTER_TYPE}
        
        input:
          hdf5:
            path: {volume_path}
            dataset: volume
          
          geometry:
            message-block-shape: [64,64,256]
            bounding-box: [[0,0,0], {[*SHAPE[::-1]]}]

          adapters:
            # Enable multi-scale, since otherwise
            # Hdf5VolumeService doesn't support it out-of-the box
            rescale-level: 0

        output:
          n5:
            path: output.n5
            dataset: s0
            create-if-necessary: true
            creation-settings:
              dtype: uint8
              # max-scale: 2 # Should be set automatically from max-pyramid-scale

          geometry:
            message-block-shape: [256,128,128]
            available-scales: [0,1,2]
        
        copygrayscale:
          max-pyramid-scale: 2
          slab-depth: 128
          fill-blocks: false  # N5 is block-based, but does not require (or allow) us to pad the boundary blocks.
    """)

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    # Run
    _execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_service = VolumeService.create_from_config(final_config['input'])
    output_service = VolumeService.create_from_config(final_config['output'])

    # Check results -- must use half-brick-aligned checks to ensure that the downsampling is the same.
    # And don't check scale 2 -- there is too much difference between downsampled by 4 vs downsampled-by-2-twice
    for scale in range(2):
        scaled_box = output_service.bounding_box_zyx // 2**scale
        brick_shape = output_service.preferred_message_shape // 2
        for brick_box in boxes_from_grid(scaled_box, brick_shape,
                                         clipped=True):
            expected_vol = input_service.get_subvolume(brick_box, scale)
            output_vol = output_service.get_subvolume(brick_box, scale)

            try:
                assert np.allclose(output_vol, expected_vol, 2, 5), \
                    f"Written vol does not match expected at scale {scale}"
            except:
                raise