Example #1
0
 def open_file(self):
     f = np.memmap(self._path,
                   dtype=self.dtype,
                   mode='r',
                   shape=self._scan_size + self._detector_size_raw)
     ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape)
     return f[ds_slice.get()]  # crop off the two extra rows
Example #2
0
 def get_partitions(self):
     ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape)
     for pslice in ds_slice.subslices(self.partition_shape):
         yield MemoryPartition(
             tileshape=self.tileshape,
             dataset=self,
             dtype=self.dtype,
             partition_slice=pslice,
         )
Example #3
0
def test_subslices_non_even_division_2():
    top_slice = Slice(
        origin=(0, 0, 0, 0),
        shape=(3, 1, 1, 1),
    )
    assert list(top_slice.subslices(shape=(2, 1, 1, 1))) == [
        Slice(origin=(0, 0, 0, 0), shape=(2, 1, 1, 1)),
        Slice(origin=(2, 0, 0, 0), shape=(1, 1, 1, 1)),
    ]
Example #4
0
def test_shift_2():
    s1 = Slice(
        origin=(2, 2, 0, 0),
        shape=(1, 1, 2, 2),
    )

    s2 = Slice(origin=(1, 1, 0, 0), shape=(1, 1, 4, 4))

    shifted = s1.shift(s2)
    assert shifted.origin == (1, 1, 0, 0)
Example #5
0
def test_slice_intersect_0():
    s1 = Slice(
        origin=(0, 0, 0, 0),
        shape=(2, 2, 2, 2),
    )
    s2 = Slice(
        origin=(0, 0, 0, 0),
        shape=(1, 1, 1, 1),
    )
    assert s1.intersection_with(s2) == s2
Example #6
0
def test_slice_intersect_3():
    s1 = Slice(
        origin=(1, 1, 1, 1),
        shape=(2, 2, 2, 2),
    )
    s2 = Slice(
        origin=(0, 0, 0, 0),
        shape=(4, 4, 4, 4),
    )
    res = s1.intersection_with(s2)
    assert res == s1
Example #7
0
def test_get_slice_2():
    slice_ = Slice(
        origin=(1, 1, 1, 1),
        shape=(1, 1, 2, 2),
    )
    data = np.arange(4 * 4 * 4 * 4).reshape(4, 4, 4, 4)
    assert slice_.get(data).shape == slice_.shape
    assert np.all(slice_.get(data) == np.array([[[
        [85, 86],
        [89, 90],
    ]]]))
Example #8
0
def test_get_slice_1():
    slice_ = Slice(
        origin=(0, 0, 0, 0),
        shape=(4, 4, 4, 4),
    )
    assert slice_.get() == (
        slice(0, 4),
        slice(0, 4),
        slice(0, 4),
        slice(0, 4),
    )
Example #9
0
def test_subslice_from_offset_length_1():
    s1 = Slice(
        origin=(1, 1, 1, 1),
        shape=(2, 2, 2, 2),
    )
    sub1 = s1.subslice_from_offset(offset=0, length=2)
    assert sub1.origin == (1, 1, 1, 1)
    assert sub1.shape == (1, 2, 2, 2)

    sub2 = s1.subslice_from_offset(offset=0, length=4)
    assert sub2.origin == (1, 1, 1, 1)
    assert sub2.shape == (2, 2, 2, 2)
Example #10
0
def test_subslice_from_offset_length_3():
    s1 = Slice(
        origin=(0, 0, 0, 0),
        shape=(4, 4, 2, 2),
    )

    # can also create subslice that is smaller than one row:
    sub3 = s1.subslice_from_offset(offset=0, length=1)
    assert sub3.origin == (0, 0, 0, 0)
    assert sub3.shape == (1, 1, 2, 2)

    sub3 = s1.subslice_from_offset(offset=1, length=1)
    assert sub3.origin == (0, 1, 0, 0)
    assert sub3.shape == (1, 1, 2, 2)
Example #11
0
 def get_partitions(self):
     ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape)
     partition_shape = Slice.partition_shape(
         datashape=self.shape,
         framesize=self.shape[2] * self.shape[3],
         dtype=self.dtype,
         target_size=256 * 1024 * 1024,
     )
     for pslice in ds_slice.subslices(partition_shape):
         yield BloPartition(
             tileshape=self._tileshape,
             dataset=self,
             dtype=self.dtype,
             partition_slice=pslice,
         )
Example #12
0
def test_slice_intersect_2():
    s1 = Slice(
        origin=(1, 1, 1, 1),
        shape=(2, 2, 2, 2),
    )
    s2 = Slice(
        origin=(0, 0, 0, 0),
        shape=(1, 1, 1, 1),
    )
    res = s1.intersection_with(s2)
    assert res == Slice(
        origin=(1, 1, 1, 1),
        shape=(0, 0, 0, 0),
    )
    assert res.is_null()
Example #13
0
def test_for_datatile_1(masks):
    tile = DataTile(
        tile_slice=Slice(origin=(0, 0, 0, 0), shape=(1, 1, 1, 1)),
        data=np.ones((1, 1, 1, 1))
    )
    slice_ = masks.get_masks_for_slice(tile.tile_slice)
    assert slice_.shape == (1, 4)
Example #14
0
    async def pick_frame(self, dataset_uuid, x, y):
        ds = self.data.get_dataset(dataset_uuid)
        x = int(x)
        y = int(y)
        slice_ = Slice(origin=(y, x, 0, 0),
                       shape=(1, 1, ds.shape[2], ds.shape[3]))
        job = PickFrameJob(dataset=ds, slice_=slice_)

        executor = self.data.get_executor()

        log.info("picking %d/%d from %s", x, y, dataset_uuid)

        futures = []
        for task in job.get_tasks():
            submit_kwargs = {}
            futures.append(executor.client.submit(task, **submit_kwargs))

        full_result = np.zeros(shape=ds.shape[2:])
        async for future, result in dd.as_completed(futures,
                                                    with_results=True):
            for tile in result:
                tile.copy_to_result(full_result)
        log.info("picking done, encoding image (dtype=%s)", full_result.dtype)
        image = await run_blocking(
            _encode_image,
            full_result,
            colormap=cm.gist_earth,
            save_kwargs={'format': 'png'},
        )
        log.info("image encoded, sending response")
        return image.read()
Example #15
0
 def _get_slice(self, slice: Slice):
     real_slice = slice.get()
     result = self._data[real_slice]
     # Defend against #1026 (internal bugs), allow deactivating in
     # optimized builds for performance
     assert result.shape == tuple(slice.shape) + self.extra_shape
     return result
Example #16
0
    def get_tiles(self):
        """
        yield one tile per underlying data block
        """
        s = self._get_sector()
        scan = self._scan_size

        try:
            all_blocks = s.get_blocks()
            blocks_to_read = (BLOCKS_PER_SECTOR_PER_FRAME * scan[0] * scan[1])
            buf = np.zeros((1, 1) + BLOCK_SHAPE, dtype="uint16")
            for block_idx, block in enumerate(
                    itertools.islice(all_blocks, blocks_to_read)):
                frame_idx = block_idx // BLOCKS_PER_SECTOR_PER_FRAME
                scan_pos_y = frame_idx // scan[1]
                scan_pos_x = frame_idx % scan[1]
                h = block.header
                # TODO: move tile_slice stuff to datablock?
                sector_offset = SECTOR_SIZE[1] * block.sector.idx
                tile_slice = Slice(
                    origin=(scan_pos_y, scan_pos_x, h['pixel_y_start'],
                            sector_offset + h['pixel_x_start']),
                    shape=(1, 1) + BLOCK_SHAPE,
                )
                log.debug("tile_slice=%r", tile_slice)
                block.readinto(buf)
                yield DataTile(data=buf, tile_slice=tile_slice)
        finally:
            s.close()
Example #17
0
    def _slice_for_partition(self, partition):
        """
        Get a Slice into self._data for `partition`, taking the current ROI into account.

        Because _data is "compressed" if a ROI is set, we can't directly index and must
        calculate a new slice from the ROI.
        """
        if self._roi is None:
            return partition.slice
        else:
            roi = self._roi.reshape((-1, ))
            slice_ = partition.slice
            s_o = slice_.origin[0]
            s_s = slice_.shape[0]
            # We need to find how many 1s there are for all previous partitions, to know
            # the origin; then we count how many 1s there are in our partition
            # to find our shape.
            origin = np.count_nonzero(roi[:s_o])
            shape = np.count_nonzero(roi[s_o:s_o + s_s])
            sig_dims = slice_.shape.sig.dims
            slice_ = Slice(
                origin=(origin, ) + slice_.origin[-sig_dims:],
                shape=Shape((shape, ) + tuple(slice_.shape.sig),
                            sig_dims=sig_dims),
            )
            return slice_
Example #18
0
def test_for_datatile_with_scan_origin(masks):
    tile = DataTile(
        tile_slice=Slice(origin=(10, 10, 0, 0), shape=(2, 2, 10, 10)),
        data=np.ones((2, 2, 10, 10))
    )
    slice_ = masks.get_masks_for_slice(tile.tile_slice)
    assert slice_.shape == (100, 4)
Example #19
0
def test_for_datatile_with_frame_origin(masks):
    tile = DataTile(tile_slice=Slice(origin=(10, 10, 10, 10),
                                     shape=(2, 2, 1, 5)),
                    data=np.ones((2, 2, 1, 5)))
    slice_ = masks.get_masks_for_slice(tile.tile_slice)
    print(slice_)
    assert_array_almost_equal(
        slice_,
        np.array([
            1,
            0,
            1,
            10,
            1,
            0,
            1,
            11,
            1,
            0,
            1,
            12,
            1,
            0,
            1,
            13,
            1,
            0,
            1,
            14,
        ]).reshape((5, 4)))
Example #20
0
 def get_partitions(self):
     ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape)
     partition_shape = Slice.partition_shape(
         datashape=self.shape,
         framesize=self._detector_size[0] * self._detector_size[1],
         dtype=self.dtype,
         target_size=256*1024*1024,
         min_num_partitions=self._min_num_partitions,
     )
     for pslice in ds_slice.subslices(partition_shape):
         # TODO: where should the tileshape be set? let the user choose for now
         yield RawFilePartition(
             tileshape=self._tileshape,
             dataset=self,
             dtype=self.dtype,
             partition_slice=pslice,
         )
Example #21
0
def test_write_handle(tmpdir_factory):
    """
    test the common "happy path":
    """
    datadir = tmpdir_factory.mktemp('write_handle_tests')
    full_path = os.path.join(datadir, "f1")
    part_slice = Slice(
        shape=Shape((32, 64, 64), sig_dims=2),
        origin=(16, 0, 0),
    )
    tile_slice = Slice(
        shape=Shape((3, 64, 64), sig_dims=2),
        origin=(19, 0, 0),
    )
    tile_data = np.random.randn(3, 64, 64).astype("float32")
    tile = DataTile(
        tile_data,
        tile_slice=tile_slice,
        scheme_idx=0,
    )

    wh = WriteHandle(full_path, datadir, part_slice, dtype='float32')

    tmp_file_name = ""

    with wh:
        wh.write_tile(tile)
        tmp_file_name = wh._tmp_file.name
        assert os.path.exists(tmp_file_name)

    # check some internals:
    assert wh._dest is None
    assert wh._tmp_file is None

    # the temporary file should no longer exist in case of success
    assert not os.path.exists(tmp_file_name)

    # ... buf our dest fname should:
    assert os.path.exists(full_path)
    assert os.path.isfile(full_path)

    # check if data is written correctly:
    read_data = np.fromfile(full_path,
                            dtype="float32").reshape(part_slice.shape)
    assert np.allclose(read_data[3:6, ...], tile_data)
Example #22
0
    def get_partitions(self):
        """
        we keep it simple: one MIB file == one partition
        """

        ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape)
        for f in self._files_sorted():
            idx = f.fields['sequence_first_image'] - 1
            length = f.fields['num_images']

            pslice = ds_slice.subslice_from_offset(offset=idx, length=length)

            yield MIBPartition(
                tileshape=self._tileshape,
                dataset=self,
                partfile=f,
                dtype=self.dtype,
                partition_slice=pslice,
            )
Example #23
0
 def get_partitions(self):
     with self.get_h5ds() as h5ds:
         ds_slice = Slice(origin=(0, 0, 0, 0), shape=h5ds.shape)
         partition_shape = Slice.partition_shape(
             datashape=h5ds.shape,
             framesize=h5ds[0][0].size,
             dtype=h5ds.dtype,
             target_size=self.target_size,
             min_num_partitions=self.min_num_partitions,
         )
         dtype = h5ds.dtype
         for pslice in ds_slice.subslices(partition_shape):
             # TODO: where should the tileshape be set? let the user choose for now
             yield H5Partition(
                 tileshape=self.tileshape,
                 dataset=self,
                 dtype=dtype,
                 partition_slice=pslice,
             )
Example #24
0
 def get_partitions(self):
     for partition in self._index['partitions']:
         yield BinaryHDFSPartition(
             path=os.path.join(self.dirname, partition['filename']),
             tileshape=self.tileshape,
             dataset=self,
             dtype=self._index['dtype'],
             partition_slice=Slice(origin=partition['origin'],
                                   shape=partition['shape']),
         )
Example #25
0
def test_subslices_simple():
    top_slice = Slice(
        origin=(0, 0, 0, 0),
        shape=(4, 4, 4, 4),
    )
    assert list(top_slice.subslices(shape=(2, 2, 4, 4))) == [
        Slice(origin=(0, 0, 0, 0), shape=(2, 2, 4, 4)),
        Slice(origin=(0, 2, 0, 0), shape=(2, 2, 4, 4)),
        Slice(origin=(2, 0, 0, 0), shape=(2, 2, 4, 4)),
        Slice(origin=(2, 2, 0, 0), shape=(2, 2, 4, 4)),
    ]
Example #26
0
def test_write_handle_exception(tmpdir_factory):
    datadir = tmpdir_factory.mktemp('write_handle_tests')
    full_path = os.path.join(datadir, "f1")
    part_slice = Slice(
        shape=Shape((32, 64, 64), sig_dims=2),
        origin=(16, 0, 0),
    )
    tile_slice = Slice(
        shape=Shape((3, 64, 64), sig_dims=2),
        origin=(19, 0, 0),
    )
    tile_data = np.random.randn(3, 64, 64).astype("float32")
    tile = DataTile(
        tile_data,
        tile_slice=tile_slice,
        scheme_idx=0,
    )

    wh = WriteHandle(full_path, datadir, part_slice, dtype='float32')

    tmp_file_name = ""

    with pytest.raises(Exception):
        with wh:
            wh.write_tile(tile)
            tmp_file_name = wh._tmp_file.name
            assert os.path.exists(tmp_file_name)
            raise Exception("nope")

    # check some internals:
    assert wh._dest is None
    assert wh._tmp_file is None

    # the temporary file should no longer exist in case of exception
    assert not os.path.exists(tmp_file_name)

    # and neither should the full destination path
    assert not os.path.exists(full_path)
Example #27
0
 def make_index(self,
                data,
                dtype,
                min_num_partitions=16,
                target_size=512 * 1024 * 1024):
     """
     create the json-serializable index structure. decides about the
     concrete partitioning, which will later be used to split the input data
     """
     partition_shape = Slice.partition_shape(
         datashape=data.shape,
         framesize=data[0][0].size,
         dtype=dtype,
         min_num_partitions=min_num_partitions,
         target_size=target_size,
     )
     partitions = self.make_partitions(
         data=data,
         partition_shape=partition_shape,
     )
     fname_fmt = "partition-%(idx)08d.raw"
     index = {
         "dtype":
         str(dtype),
         "mode":
         "rect",
         "shape":
         data.shape,
         "partitions": [{
             "origin": p['origin'],
             "shape": p['shape'],
             "filename": fname_fmt % {
                 "idx": i
             },
         } for (i, p) in enumerate(partitions)]
     }
     return index
Example #28
0
def test_copy_to_result():
    # result tile: for three masks, insert all ones into the given position:
    res_tile = ResultTile(
        data=np.ones((
            4,  # xdim*ydim, flattened
            3,  # num masks
        )),
        tile_slice=Slice(origin=(2, 2, 0, 0), shape=(1, 4, 10, 10)),
    )
    result = np.zeros((
        3,  # num masks
        10,  # ydim
        10)  # xdim
                      )
    res_tile.copy_to_result(result)
    res_tile.copy_to_result(result)
    print(result)

    dest_slice = res_tile._get_dest_slice()
    assert dest_slice[0] == Ellipsis
    assert dest_slice[1] == slice(2, 3, None)
    assert dest_slice[2] == slice(2, 6, None)

    assert len(dest_slice) == 3

    # let's see if we can select the right slice:
    assert result[..., 2:3, 2:6].shape == (3, 1, 4)

    # the region selected above should be 2:
    assert np.all(result[..., 2:3, 2:6] == 2)

    # everything else should be 0:
    assert np.all(result[..., 2:3, :2] == 0)
    assert np.all(result[..., 2:3, 6:] == 0)
    assert np.all(result[..., :2, :] == 0)
    assert np.all(result[..., 3:, :] == 0)
Example #29
0
def encode_roundtrip_quad(encode,
                          bits_per_pixel,
                          input_data=None,
                          dataset_shape=None,
                          tileshape=None,
                          start_at_frame=2,
                          stop_before_frame=6):
    if dataset_shape is None:
        # make some read ranges:
        dataset_shape = (6, 512, 512)
    dataset_shape = Shape(dataset_shape, sig_dims=2)
    if tileshape is None:
        tileshape = (2, 128, 512)
    tiling_scheme = TilingScheme.make_for_shape(
        dataset_shape=dataset_shape,
        tileshape=Shape(tileshape, sig_dims=2),
    )
    sync_offset = 0
    roi = None

    frame_header_bytes = 768

    image_size_bytes = dataset_shape.sig.size * bits_per_pixel // 8

    if bits_per_pixel in (1, 8):
        native_dtype = np.uint8
    elif bits_per_pixel == 16:
        native_dtype = np.uint16

    fields: HeaderDict = {
        'header_size_bytes':
        frame_header_bytes,
        'dtype':
        native_dtype,
        'mib_dtype':
        'R64',
        'mib_kind':
        'r',

        # remove padding from `bits_per_pixel`
        'bits_per_pixel': {
            1: 1,
            8: 6,
            16: 12
        }[bits_per_pixel],
        'image_size': (512, 512),
        'image_size_bytes':
        image_size_bytes,
        'sequence_first_image':
        1,
        'filesize':
        dataset_shape.nav.size * (image_size_bytes + frame_header_bytes),
        'num_images':
        dataset_shape.nav.size,
        'num_chips':
        4,
        'sensor_layout': (2, 2),
    }

    file = MIBFile(
        path="",
        start_idx=0,
        end_idx=dataset_shape.nav.size,
        native_dtype=native_dtype,
        sig_shape=dataset_shape.sig,
        frame_header=frame_header_bytes,
        file_header=0,
        header=fields,
    )

    fileset = MIBFileSet(files=[file],
                         header=fields,
                         frame_header_bytes=frame_header_bytes)

    backend = MMapBackendImplInMem()

    max_value = (1 << bits_per_pixel) - 1
    if input_data is None:
        data_full = np.random.randint(0, max_value + 1,
                                      tuple(dataset_shape.flatten_nav()))
        # make sure min/max values are indeed hit:
        data_full.reshape((-1, ))[0] = max_value
        data_full.reshape((-1, ))[-1] = 0
        assert np.max(data_full) == max_value
        assert np.min(data_full) == 0
    else:
        data_full = input_data.reshape(dataset_shape.flatten_nav())
    data = data_full[start_at_frame:stop_before_frame]

    # we need headers in-between, in contrast to the frame-by-frame decoding, the decoder
    # expects contiguous input data and we can't slice them away beforehand:
    encoded_data = encode_quad(encode,
                               data_full,
                               bits_per_pixel,
                               with_headers=True)
    decoded = np.zeros_like(data)

    # that's the "interface" we made up for the in-mem mmap file above:
    file.data = encoded_data

    # wrapping the numba decoder function:
    decoder = MIBDecoder(header=fields)

    outer_slice = Slice(
        origin=(start_at_frame, 0, 0),
        shape=dataset_shape.flatten_nav(),
    )

    read_ranges = fileset.get_read_ranges(
        start_at_frame=start_at_frame,
        stop_before_frame=stop_before_frame,
        dtype=native_dtype,
        tiling_scheme=tiling_scheme,
        sync_offset=sync_offset,
        roi=roi,
    )

    for tile in backend.get_tiles(
            tiling_scheme=tiling_scheme,
            fileset=fileset,
            read_ranges=read_ranges,
            roi=roi,
            native_dtype=np.uint8,
            read_dtype=np.float32,
            decoder=decoder,
            sync_offset=0,
            corrections=None,
    ):
        slice_shifted = tile.tile_slice.shift(outer_slice)
        decoded[slice_shifted.get()] = tile.reshape(tile.tile_slice.shape)

    assert_allclose(data, decoded)
    return data, decoded
Example #30
0
def disjoint(sl: Slice, slices: Iterable[Slice]):
    return all(sl.intersection_with(s2).is_null() for s2 in slices)