Beispiel #1
0
def test_writing_to_precomputed_chunks():
    tmp_path = create_tmp_dir(prefix="test_writing_to_precomputed_chunks")
    datasource = ArrayDataSource(data=data, tile_shape=Shape5D(x=10, y=10))
    scale = PrecomputedChunksScale.from_datasource(
        datasource=datasource,
        key=PurePosixPath("my_test_data"),
        encoding=RawEncoder())
    sink_path = PurePosixPath("mytest.precomputed")
    filesystem = OsFs(tmp_path.as_posix())

    datasink = PrecomputedChunksScaleSink(
        filesystem=filesystem,
        info_dir=sink_path,
        scale=scale,
        dtype=datasource.dtype,
        num_channels=datasource.shape.c,
    )
    creation_result = datasink.create()
    if isinstance(creation_result, Exception):
        raise creation_result

    for tile in datasource.roi.get_datasource_tiles():
        datasink.write(tile.retrieve())

    precomp_datasource = PrecomputedChunksDataSource(
        path=sink_path, filesystem=filesystem, resolution=scale.resolution)
    reloaded_data = precomp_datasource.retrieve()
    assert reloaded_data == data
Beispiel #2
0
def test_writing_to_precomputed_chunks(tmp_path: Path, data: Array5D):
    datasource = ArrayDataSource.from_array5d(data,
                                              tile_shape=Shape5D(x=10, y=10))
    scale = PrecomputedChunksScale.from_datasource(datasource=datasource,
                                                   key=Path("my_test_data"),
                                                   encoding=RawEncoder())
    info = PrecomputedChunksInfo(
        data_type=datasource.dtype,
        type_="image",
        num_channels=datasource.shape.c,
        scales=tuple([scale]),
    )
    sink_path = Path("mytest.precomputed")
    filesystem = OsFs(tmp_path.as_posix())

    datasink = PrecomputedChunksSink.create(
        filesystem=filesystem,
        base_path=sink_path,
        info=info,
    ).scale_sinks[0]

    for tile in datasource.roi.get_datasource_tiles():
        datasink.write(tile.retrieve())

    precomp_datasource = PrecomputedChunksDataSource(
        path=sink_path, filesystem=filesystem, resolution=scale.resolution)
    reloaded_data = precomp_datasource.retrieve()
    assert reloaded_data == data
 def from_json_value(cls, value: JsonValue) -> "PrecomputedChunksScaleSink":
     value_obj = ensureJsonObject(value)
     return PrecomputedChunksScaleSink(
         filesystem=JsonableFilesystem.from_json_value(
             value_obj.get("filesystem")),
         info_dir=PurePosixPath(ensureJsonString(
             value_obj.get("info_dir"))),
         scale=PrecomputedChunksScale.from_json_value(
             value_obj.get("scale")),
         dtype=np.dtype(ensureJsonString(value_obj.get("dtype"))),
         num_channels=ensureJsonInt(value_obj.get("num_channels")))
Beispiel #4
0
def test_bucket_read_write():
    raw_data_source = get_sample_c_cells_datasource()
    bucket_fs = get_test_output_bucket_fs()

    precomp_path = PurePosixPath("c_cells_1.precomputed")
    sink = PrecomputedChunksScaleSink(
        info_dir=precomp_path,
        filesystem=bucket_fs,
        num_channels=raw_data_source.shape.c,
        scale=PrecomputedChunksScale(
            key=PurePosixPath("exported_data"),
            size=(raw_data_source.shape.x, raw_data_source.shape.y,
                  raw_data_source.shape.z),
            chunk_sizes=tuple([
                (raw_data_source.tile_shape.x, raw_data_source.tile_shape.y,
                 raw_data_source.tile_shape.z)
            ]),
            encoding=RawEncoder(),
            voxel_offset=(raw_data_source.location.x,
                          raw_data_source.location.y,
                          raw_data_source.location.z),
            resolution=raw_data_source.spatial_resolution),
        dtype=raw_data_source.dtype,
    )

    sink_writer = sink.create()
    assert not isinstance(sink_writer, Exception)

    assert bucket_fs.exists(precomp_path.joinpath("info").as_posix())
    assert not bucket_fs.exists(
        precomp_path.joinpath("i_dont_exist").as_posix())

    with ProcessPoolExecutor() as executor:
        _ = list(
            executor.map(partial(_write_data, sink_writer=sink_writer),
                         raw_data_source.roi.get_datasource_tiles()))

    data_proxy_source = PrecomputedChunksDataSource(
        path=precomp_path,
        filesystem=bucket_fs,
        resolution=(raw_data_source.spatial_resolution))

    retrieved_data = data_proxy_source.retrieve()
    assert np.all(
        retrieved_data.raw("yxc") == raw_data_source.retrieve().raw("yxc"))
Beispiel #5
0
def create_precomputed_chunks_sink(
        *,
        shape: Shape5D,
        dtype: "np.dtype[Any]",
        chunk_size: Shape5D,
        fs: "JsonableFilesystem | None" = None) -> FsDataSink:
    return PrecomputedChunksScaleSink(
        filesystem=fs or get_test_output_osfs(),
        info_dir=PurePosixPath(f"{uuid.uuid4()}.precomputed"),
        dtype=dtype,
        num_channels=shape.c,
        scale=PrecomputedChunksScale(
            key=PurePosixPath("some_data"),
            size=(shape.x, shape.y, shape.z),
            resolution=(1, 1, 1),
            voxel_offset=(0, 0, 0),
            chunk_sizes=tuple([(chunk_size.x, chunk_size.y, chunk_size.z)]),
            encoding=RawEncoder(),
        ))
Beispiel #6
0
def test_writing_to_offset_precomputed_chunks():
    tmp_path = create_tmp_dir(
        prefix="test_writing_to_offset_precomputed_chunks")
    data_at_1000_1000 = data.translated(
        Point5D(x=1000, y=1000) - data.location)
    datasource = ArrayDataSource(data=data_at_1000_1000,
                                 tile_shape=Shape5D(x=10, y=10))
    scale = PrecomputedChunksScale.from_datasource(
        datasource=datasource,
        key=PurePosixPath("my_test_data"),
        encoding=RawEncoder())
    sink_path = PurePosixPath("mytest.precomputed")
    filesystem = OsFs(tmp_path.as_posix())

    print(f"\n\n will write to '{filesystem.geturl(sink_path.as_posix())}' ")

    datasink = PrecomputedChunksScaleSink(
        filesystem=filesystem,
        info_dir=sink_path,
        scale=scale,
        num_channels=datasource.shape.c,
        dtype=datasource.dtype,
    )
    creation_result = datasink.create()
    if isinstance(creation_result, Exception):
        raise creation_result

    for tile in datasource.roi.get_datasource_tiles():
        datasink.write(tile.retrieve())

    precomp_datasource = PrecomputedChunksDataSource(
        path=sink_path, filesystem=filesystem, resolution=scale.resolution)

    reloaded_data = precomp_datasource.retrieve(
        interval=data_at_1000_1000.interval)
    assert (reloaded_data.raw("xyz") == data.raw("xyz")).all()
assert not isinstance(classifier, Exception)


# we will output to neuroglancer's Precomputed Chunks format
# https://github.com/google/neuroglancer/tree/master/src/neuroglancer/datasource/precomputed
output_interval: Interval5D = classifier.get_expected_roi(data_source.roi)
predictions_data_sink = PrecomputedChunksScaleSink(
    filesystem=OsFs("/tmp"),
    dtype=np.dtype("float32"),
    info_dir=PurePosixPath("my_exported_data"),
    num_channels=classifier.num_classes,
    scale=PrecomputedChunksScale(
        key=PurePosixPath("1_1_1"),
        size=(output_interval.shape.x, output_interval.shape.y, output_interval.shape.z),
        resolution=(1,1,1),
        voxel_offset=(output_interval.start.x, output_interval.start.y, output_interval.start.z),
        chunk_sizes=(
            (data_source.tile_shape.x, data_source.tile_shape.y, data_source.tile_shape.z),
        ),
        encoding=RawEncoder()
    )
)
 #creates info file on disk plus the "my_exported_data" dir, making us ready to write
sink_writer = predictions_data_sink.create()
assert not isinstance(sink_writer, Exception)

# predict on independent tiles. You could run this with e.g. concurrent.futures.Executor
for lazy_tile in data_source.roi.get_datasource_tiles():
    predictions: Array5D = classifier(lazy_tile) #if you need the raw numpy array, call .e.g predictions.raw("yx")
    #predictions.as_uint8().show_channels()
    sink_writer.write(predictions)