Python downsample Beispiele, flyemflows.util.downsample Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_samplepoints.py Projekt: janelia-flyem/flyemflows

def test_samplepoints_rescale(setup_samplepoints):
    template_dir, config, volume, points_df = setup_samplepoints
    config = copy.copy(config)
    config["input"]["adapters"]["rescale-level"] = 2
    config["samplepoints"]["rescale-points-to-level"] = 2

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, workflow = launch_flow(template_dir, 1)

    final_config = workflow.config
    output_df = pd.read_csv(
        f'{execution_dir}/{final_config["samplepoints"]["output-table"]}')

    # Should appear in sorted order
    points_df.sort_values(['z', 'y', 'x'], inplace=True)
    sorted_coords = points_df[['z', 'y', 'x']].values

    output_df['Z'] = points_df['z'].values
    output_df['Y'] = points_df['y'].values
    output_df['X'] = points_df['x'].values

    assert (sorted_coords == output_df[['z', 'y', 'x']].values).all()

    downsampled_vol = downsample(volume, 2**2, 'labels')
    labels = downsampled_vol[(*(sorted_coords // 2**2).transpose(), )]
    assert (labels == output_df['label'].values).all()

    # 'extra' columns should be preserved, even
    # though they weren't used in the computation.
    input_extra = points_df.sort_values(['z', 'y', 'x'])['extra'].values
    output_extra = output_df.sort_values(['z', 'y', 'x'])['extra'].values
    assert (output_extra == input_extra
            ).all(), "Extra column was not correctly preserved"

Beispiel #2

0

Datei anzeigen

Datei: test_dvid_volume_service.py Projekt: janelia-flyem/flyemflows

def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-grayscale'

    volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8)
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          grayscale-name: {instance_name}
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        if scale == 0:
            assert instance_name in repo_instances
            assert repo_instances[instance_name] == 'uint8blk'
        else:
            assert f"{instance_name}_{scale}" in repo_instances
            assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk'

        vol = downsample(volume, 2**scale,
                         'label')  # label downsampling is easier to test with
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint8)
        overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol)
        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()

Beispiel #3

0

Datei anzeigen

Datei: test_scaled_volume_service.py Projekt: janelia-flyem/flyemflows

def test_sample_labels(setup_hdf5_service):
    _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)

    downsampled = downsample(full_from_h5, 2, 'block-mean')
    points = [np.random.randint(d, size=(10, )) for d in downsampled.shape]
    points = np.transpose(points)

    # Scale 1
    volume_config["adapters"]["rescale-level"] = 1
    scaled_reader = VolumeService.create_from_config(volume_config)

    labels = scaled_reader.sample_labels(points)
    assert (labels == downsampled[(*points.transpose(), )]).all()

Beispiel #4

0

Datei anzeigen

Datei: test_scaled_volume_service.py Projekt: janelia-flyem/flyemflows

def test_subvolume_downsample_1(setup_hdf5_service):
    _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service

    down_box = np.array([[13, 15, 20], [20, 40, 41]])
    up_box = 2 * down_box
    up_subvol_from_h5 = full_from_h5[box_to_slicing(*up_box)]
    down_subvol_from_h5 = downsample(up_subvol_from_h5, 2, 'block-mean')

    # Scale 1
    scaled_reader = ScaledVolumeService(h5_reader, 1)
    subvol_scaled = scaled_reader.get_subvolume(down_box)

    assert (subvol_scaled.shape == down_box[1] - down_box[0]).all()
    assert down_subvol_from_h5.shape == subvol_scaled.shape, \
        f"{subvol_scaled.shape} != {down_subvol_from_h5.shape}"
    assert (subvol_scaled == down_subvol_from_h5).all()
    assert subvol_scaled.flags.c_contiguous

Beispiel #5

0

Datei anzeigen

Datei: test_scaled_volume_service.py Projekt: janelia-flyem/flyemflows

def test_full_volume_downsample_1(setup_hdf5_service):
    _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service
    validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True)

    # Scale 1
    volume_config["adapters"]["rescale-level"] = 1
    scaled_reader = VolumeService.create_from_config(volume_config)

    assert (scaled_reader.bounding_box_zyx == h5_reader.bounding_box_zyx //
            2).all()
    assert (scaled_reader.preferred_message_shape ==
            h5_reader.preferred_message_shape // 2).all()
    assert scaled_reader.block_width == h5_reader.block_width // 2
    assert scaled_reader.dtype == h5_reader.dtype

    full_scaled = scaled_reader.get_subvolume(scaled_reader.bounding_box_zyx)
    assert (full_scaled == downsample(full_from_h5, 2, 'block-mean')).all()
    assert full_scaled.flags.c_contiguous

Beispiel #6

0

Datei anzeigen

Datei: test_masksegmentation.py Projekt: janelia-flyem/flyemflows

def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask,
                                roi_dilation, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input

    if invert_mask:
        roi_mask_s5 = ~roi_mask_s5

    config["masksegmentation"]["invert-mask"] = invert_mask
    config["masksegmentation"]["dilate-roi"] = roi_dilation

    # re-dump config
    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    input_box_xyz = np.array(final_config['input']['geometry']['bounding-box'])
    input_box_zyx = input_box_xyz[:, ::-1]

    roi_mask = upsample(roi_mask_s5, 2**5)
    roi_mask = extract_subvol(roi_mask, input_box_zyx)

    expected_vol = extract_subvol(volume.copy(), input_box_zyx)
    expected_vol[roi_mask] = 0

    output_box_xyz = np.array(
        final_config['output']['geometry']['bounding-box'])
    output_box_zyx = output_box_xyz[:, ::-1]
    output_vol = fetch_labelmap_voxels(dvid_address,
                                       repo_uuid,
                                       output_segmentation_name,
                                       output_box_zyx,
                                       scale=0,
                                       supervoxels=True)

    # Create a copy of the volume that contains only the voxels we removed
    erased_vol = volume.copy()
    erased_vol[~roi_mask] = 0

    if EXPORT_DEBUG_FILES:
        original_vol = fetch_labelmap_voxels(dvid_address,
                                             repo_uuid,
                                             input_segmentation_name,
                                             output_box_zyx,
                                             scale=0,
                                             supervoxels=True)
        original_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                   repo_uuid,
                                                   input_segmentation_name,
                                                   output_box_zyx,
                                                   scale=0)
        output_agglo_vol = fetch_labelmap_voxels(dvid_address,
                                                 repo_uuid,
                                                 output_segmentation_name,
                                                 output_box_zyx,
                                                 scale=0)
        np.save('/tmp/original-svs.npy', original_vol)
        np.save('/tmp/original-agglo.npy', original_agglo_vol)
        np.save('/tmp/output.npy', output_vol)
        np.save('/tmp/output-agglo.npy', output_agglo_vol)
        np.save('/tmp/expected.npy', expected_vol)
        np.save('/tmp/erased.npy', erased_vol)

        shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5')
        if roi_dilation:
            shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5',
                            '/tmp/dilated-roi-mask.h5')
        if invert_mask:
            shutil.copyfile(f'{execution_dir}/segmentation-mask.h5',
                            '/tmp/segmentation-mask.h5')
        shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5')

    if roi_dilation > 0:
        # FIXME: We don't yet verify voxel-accuracy of ROI dilation.
        return

    assert (output_vol == expected_vol).all(), \
        "Written vol does not match expected"

    scaled_expected_vol = expected_vol
    for scale in range(1, 1 + MAX_SCALE):
        scaled_expected_vol = downsample(scaled_expected_vol, 2,
                                         'labels-numba')
        scaled_output_vol = fetch_labelmap_voxels(dvid_address,
                                                  repo_uuid,
                                                  output_segmentation_name,
                                                  output_box_zyx // 2**scale,
                                                  scale=scale,
                                                  supervoxels=True)

        if EXPORT_DEBUG_FILES:
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol)
            np.save(f'/tmp/output-{scale}.npy', scaled_output_vol)

        if scale <= 5:
            assert (scaled_output_vol == scaled_expected_vol).all(), \
                f"Written vol does not match expected at scale {scale}"
        else:
            # For scale 6 and 7, some blocks are not even changed,
            # but that means we would be comparing DVID's label
            # downsampling method to our method ('labels-numba').
            # The two don't necessarily give identical results in the case of 'ties',
            # so we'll just verify that the nonzero voxels match, at least.
            assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \
                f"Written vol does not match expected at scale {scale}"

    block_stats_path = f'{execution_dir}/erased-block-statistics.h5'
    with h5py.File(block_stats_path, 'r') as f:
        stats_df = pd.DataFrame(f['stats'][:])

    #
    # Check the exported block statistics
    #
    stats_cols = [*BLOCK_STATS_DTYPES.keys()]
    assert stats_df.columns.tolist() == stats_cols
    stats_df = stats_df.sort_values(stats_cols).reset_index()

    expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol,
                                               input_box_zyx)
    expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index()

    assert len(stats_df) == len(expected_stats_df)
    assert (stats_df == expected_stats_df).all().all()

    #
    # Try updating the labelindexes
    #
    src_info = (dvid_address, repo_uuid, input_segmentation_name)
    dest_info = (dvid_address, repo_uuid, output_segmentation_name)
    with switch_cwd(execution_dir):
        erase_from_labelindexes(src_info,
                                dest_info,
                                block_stats_path,
                                batch_size=10,
                                threads=4)

    # Verify deleted supervoxels
    assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv')
    deleted_svs = set(
        pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv'])

    orig_svs = {*pd.unique(volume.reshape(-1))} - {0}
    remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0}
    expected_deleted_svs = orig_svs - remaining_svs
    assert deleted_svs == expected_deleted_svs

    # Verify remaining sizes
    expected_sv_counts = (pd.Series(
        expected_vol.reshape(-1),
        name='sv').value_counts().drop(0).sort_index().rename('count'))

    index_dfs = []
    for body in np.unique(fetch_mapping(*dest_info, remaining_svs)):
        index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks
        index_dfs.append(index_df)

    sv_counts = (pd.concat(index_dfs, ignore_index=True)[[
        'sv', 'count'
    ]].groupby('sv')['count'].sum().sort_index())
    assert set(sv_counts.index.values) == set(expected_sv_counts.index.values)
    assert (sv_counts == expected_sv_counts).all(), \
        pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count')

    # Verify mapping
    # Deleted supervoxels exist in the mapping, but they map to 0.
    assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all()

    # Remaining supervoxels still map to their original bodies
    assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping(
        *src_info, [*remaining_svs])).all()

Beispiel #7

0

Datei anzeigen

Datei: test_labelmapcopy.py Projekt: janelia-flyem/flyemflows

def setup_dvid_segmentation_input(setup_dvid_repo, random_segmentation):
    dvid_address, repo_uuid = setup_dvid_repo

    input_segmentation_name = 'labelmapcopy-segmentation-input'
    output_segmentation_name = 'labelmapcopy-segmentation-output'

    partial_output_segmentation_name = 'labelmapcopy-segmentation-partial-output'

    max_scale = 3
    already_exists = False

    try:
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name,
                                 max_scale=max_scale)
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name,
                                 max_scale=max_scale)
    except HTTPError as ex:
        if ex.response is not None and 'already exists' in ex.response.content.decode(
                'utf-8'):
            already_exists = True

    expected_vols = {}
    for scale in range(1 + max_scale):
        if scale == 0:
            scaled_vol = random_segmentation
        else:
            scaled_vol = downsample(scaled_vol, 2, 'labels-numba')
        expected_vols[scale] = scaled_vol

        if not already_exists:
            scaled_box = round_box([(0, 0, 0), scaled_vol.shape], 64, 'out')
            aligned_vol = np.zeros(scaled_box[1], np.uint64)
            overwrite_subvol(aligned_vol, [(0, 0, 0), scaled_vol.shape],
                             scaled_vol)
            post_labelmap_voxels(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name, (0, 0, 0),
                                 aligned_vol,
                                 scale=scale)

    if not already_exists:
        # Create a 'partial' output volume that is the same (bitwise) as the input except for some blocks.
        scaled_box = np.array([(0, 0, 0), random_segmentation.shape])
        scaled_box[1, -1] = 192
        for scale in range(1 + max_scale):
            scaled_box = round_box(scaled_box // (2**scale), 64, 'out')
            raw_blocks = fetch_labelmap_voxels(dvid_address,
                                               repo_uuid,
                                               input_segmentation_name,
                                               scaled_box,
                                               scale,
                                               supervoxels=True,
                                               format='raw-response')
            post_labelmap_blocks(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name, [(0, 0, 0)],
                                 raw_blocks,
                                 scale,
                                 is_raw=True)

        block = np.random.randint(1_000_000,
                                  1_000_010,
                                  size=(64, 64, 64),
                                  dtype=np.uint64)
        post_labelmap_voxels(dvid_address,
                             repo_uuid,
                             partial_output_segmentation_name, (0, 128, 64),
                             block,
                             0,
                             downres=True)

    partial_vol = fetch_labelmap_voxels(dvid_address,
                                        repo_uuid,
                                        partial_output_segmentation_name,
                                        [(0, 0, 0), random_segmentation.shape],
                                        supervoxels=True)

    template_dir = tempfile.mkdtemp(suffix="labelmapcopy-template")

    config_text = textwrap.dedent(f"""\
        workflow-name: labelmapcopy
        cluster-type: {CLUSTER_TYPE}
         
        input:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {input_segmentation_name}
            supervoxels: true
           
          geometry:
            message-block-shape: [512,64,64]
            available-scales: [0,1,2,3]
 
        output:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {output_segmentation_name}
            supervoxels: true
            disable-indexing: true
            create-if-necessary: true
        
        labelmapcopy:
          slab-shape: [512,128,64]
          dont-overwrite-identical-blocks: true
    """)

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    yaml = YAML()
    with StringIO(config_text) as f:
        config = yaml.load(f)

    return template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, partial_output_segmentation_name

Beispiel #8

0

Datei anzeigen

Datei: test_dvid_volume_service.py Projekt: janelia-flyem/flyemflows

def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation,
                                      disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-labelmap'

    volume = random_segmentation[:256, :192, :128]
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          segmentation-name: {instance_name}
          supervoxels: true
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
          message-block-shape: [64,64,64]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    assert instance_name in repo_instances
    assert repo_instances[instance_name] == 'labelmap'

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        vol = downsample(volume, 2**scale, 'label')
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint64)
        overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol)

        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()

    #
    # Check sparse coords function
    #
    labels = list({*pd.unique(volume.reshape(-1))} - {0})
    brick_coords_df = service.sparse_brick_coords_for_labels(labels)

    assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label']
    assert set(brick_coords_df['label'].values) == set(labels), \
        "Some labels were missing from the sparse brick coords!"

    def ndi(shape):
        return np.indices(shape).reshape(len(shape), -1).transpose()

    expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx'])

    expected_df['label'] = volume.reshape(-1)
    expected_df['z'] //= 64
    expected_df['y'] //= 64
    expected_df['x'] //= 64
    expected_df = expected_df.drop_duplicates()
    expected_df['z'] *= 64
    expected_df['y'] *= 64
    expected_df['x'] *= 64

    expected_df = expected_df.query('label != 0')

    expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)
    brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)

    expected_df.reset_index(drop=True, inplace=True)
    brick_coords_df.reset_index(drop=True, inplace=True)

    assert expected_df.shape == brick_coords_df.shape
    assert (brick_coords_df == expected_df).all().all()

    #
    # Check sample_labels()
    #
    points = [np.random.randint(d, size=(10, )) for d in vol.shape]
    points = np.transpose(points)
    labels = service.sample_labels(points)
    assert (labels == volume[(*points.transpose(), )]).all()