Esempio n. 1
0
def decode_chunk_into(chunk, buf, block_size):
    num_channels = chunk.shape[0]
    # Grid size (number of blocks in the chunk)
    gx = ceil_div(chunk.shape[3], block_size[0])
    gy = ceil_div(chunk.shape[2], block_size[1])
    gz = ceil_div(chunk.shape[1], block_size[2])

    if len(buf) < num_channels * (4 + 8 * gx * gy * gz):
        raise InvalidFormatError("compressed_segmentation file too short")

    channel_offsets = [
        4 * ret[0]
        for ret in struct.iter_unpack("<I", buf[:4*num_channels])
    ]
    for channel, (offset, next_offset) in enumerate(
            itertools.zip_longest(channel_offsets,
                                  channel_offsets[1:])):
        # next_offset will be None for the last channel
        if offset + 8 * gx * gy * gz > len(buf):
            raise InvalidFormatError("compressed_segmentation channel offset "
                                     "is too large (truncated file?)")
        _decode_channel_into(
            chunk, channel, buf[offset:next_offset], block_size
        )

    return chunk
def _decode_channel_into(chunk, channel, buf, block_size):
    # Grid size (number of blocks in the chunk)
    gx = ceil_div(chunk.shape[3], block_size[0])
    gy = ceil_div(chunk.shape[2], block_size[1])
    gz = ceil_div(chunk.shape[1], block_size[2])
    block_num_elem = block_size[0] * block_size[1] * block_size[2]
    for z, y, x in np.ndindex((gz, gy, gx)):
        # Read the block header
        res = struct.unpack_from("<II", buf, 8 * (x + gx * (y + gy * z)))
        lookup_table_offset = 4 * (res[0] & 0x00FFFFFF)
        bits = res[0] >> 24
        if bits not in (0, 1, 2, 4, 8, 16, 32):
            raise InvalidFormatError(
                "Invalid number of encoding bits for "
                "compressed_segmentation block ({0})".format(bits))
        encoded_values_offset = 4 * res[1]
        lookup_table_past_end = lookup_table_offset + chunk.itemsize * min(
            (2**bits), ((len(buf) - lookup_table_offset) // chunk.itemsize))
        lookup_table = np.frombuffer(
            buf[lookup_table_offset:lookup_table_past_end], dtype=chunk.dtype)
        if bits == 0:
            block = np.empty(block_size, dtype=chunk.dtype)
            try:
                block[...] = lookup_table[0]
            except IndexError as exc:
                raise InvalidFormatError(
                    "Invalid compressed_segmentation data: indexing out of "
                    "the lookup table") from exc
        else:
            values_per_32bit = 32 // bits
            encoded_values_end = encoded_values_offset + 4 * (ceil_div(
                block_num_elem, values_per_32bit))
            if encoded_values_end > len(buf):
                raise InvalidFormatError(
                    "Invalid compressed_segmentation data: file too short, "
                    "insufficient room for encoded values")
            packed_values = np.frombuffer(
                buf[encoded_values_offset:encoded_values_end], dtype="<I")
            encoded_values = _unpack_encoded_values(packed_values, bits,
                                                    block_num_elem)
            # Apply the lookup table
            try:
                decoded_values = lookup_table[encoded_values]
            except IndexError as exc:
                raise InvalidFormatError(
                    "Invalid compressed_segmentation data: indexing out of "
                    "the lookup table") from exc
            block = decoded_values.reshape(
                (block_size[2], block_size[1], block_size[0]))

        # Remove padding
        zmax = min(block_size[2], chunk.shape[1] - z * block_size[2])
        ymax = min(block_size[1], chunk.shape[2] - y * block_size[1])
        xmax = min(block_size[0], chunk.shape[3] - x * block_size[0])
        chunk[channel, z * block_size[2]:(z + 1) * block_size[2],
              y * block_size[1]:(y + 1) * block_size[1],
              x * block_size[0]:(x + 1) *
              block_size[0]] = block[:zmax, :ymax, :xmax]
    def downscale_info(scale_level):
        factors = [
            2**max(0, scale_level - delay) for delay in axis_level_delays
        ]
        scale_info = copy.deepcopy(full_scale_info)
        scale_info["resolution"] = [
            res * axis_factor
            for res, axis_factor in zip(full_scale_info["resolution"], factors)
        ]
        scale_info["size"] = [
            ceil_div(sz, axis_factor)
            for sz, axis_factor in zip(full_scale_info["size"], factors)
        ]
        # Key is the resolution in micrometres
        scale_info["key"] = format_length(min(scale_info["resolution"]),
                                          key_unit)

        max_delay = max(axis_level_delays)
        anisotropy_factors = [
            max(0, max_delay - delay - scale_level)
            for delay in axis_level_delays
        ]
        sum_anisotropy_factors = sum(anisotropy_factors)

        # Ensure that the smallest chunk size is 1 for extremely anisotropic
        # datasets (i.e. reduce the anisotropy of chunk_size)
        excess_anisotropy = sum_anisotropy_factors - 3 * target_chunk_exponent
        if excess_anisotropy > 0:
            anisotropy_reduction = ceil_div(
                excess_anisotropy,
                sum(1 for f in anisotropy_factors if f != 0))
            anisotropy_factors = [
                max(f - anisotropy_reduction, 0) for f in anisotropy_factors
            ]
            sum_anisotropy_factors = sum(anisotropy_factors)
            assert sum_anisotropy_factors <= 3 * target_chunk_exponent

        base_chunk_exponent = (target_chunk_exponent -
                               (sum_anisotropy_factors + 1) // 3)
        assert base_chunk_exponent >= 0
        scale_info["chunk_sizes"] = [[
            2**(base_chunk_exponent + anisotropy_factor)
            for anisotropy_factor in anisotropy_factors
        ]]

        assert (abs(
            sum(
                int(round(math.log2(size)))
                for size in scale_info["chunk_sizes"][0]) -
            3 * target_chunk_exponent) <= 1)

        return scale_info
Esempio n. 4
0
def _encode_channel(chunk_channel, block_size):
    block_size = tuple(block_size)
    # Grid size (number of blocks in the chunk)
    gx = ceil_div(chunk_channel.shape[2], block_size[0])
    gy = ceil_div(chunk_channel.shape[1], block_size[1])
    gz = ceil_div(chunk_channel.shape[0], block_size[2])
    stored_lut_offsets = {}
    buf = bytearray(gx * gy * gz * 8)
    for z, y, x in np.ndindex((gz, gy, gx)):
        block = chunk_channel[
            z*block_size[2] : (z+1)*block_size[2],
            y*block_size[1] : (y+1)*block_size[1],
            x*block_size[0] : (x+1)*block_size[0]
        ]
        if block.shape != block_size:
            block = pad_block(block, block_size)

        # TODO optimization: to improve additional compression (gzip), sort the
        # list of unique symbols by decreasing frequency using
        # return_counts=True so that low-value symbols are used more often.
        # Alternatively, sort by label value to improve sharing of lookup
        # tables…
        (lookup_table, encoded_values) = np.unique(
            block, return_inverse=True, return_counts=False)
        bits = number_of_encoding_bits(len(lookup_table))

        # Write look-up table to the buffer (or re-use another one)
        lut_bytes = lookup_table.astype(block.dtype).tobytes()
        if lut_bytes in stored_lut_offsets:
            lookup_table_offset = stored_lut_offsets[lut_bytes]
        else:
            assert len(buf) % 4 == 0
            lookup_table_offset = len(buf) // 4
            buf += lut_bytes
            stored_lut_offsets[lut_bytes] = lookup_table_offset

        assert len(buf) % 4 == 0
        encoded_values_offset = len(buf) // 4
        buf += _pack_encoded_values(encoded_values, bits)

        assert lookup_table_offset == (lookup_table_offset & 0xFFFFFF)
        struct.pack_into("<II", buf, 8 * (x + gx * (y + gy * z)),
                         lookup_table_offset | (bits << 24),
                         encoded_values_offset)
    return buf
Esempio n. 5
0
def _unpack_encoded_values(packed_values, bits, num_values):
    assert bits > 0
    bitmask = (1 << bits) - 1
    values_per_32bit = 32 // bits
    padded_values = np.empty(
        values_per_32bit * ceil_div(num_values, values_per_32bit),
        dtype="<I")
    for shift in range(values_per_32bit):
        padded_values[shift::values_per_32bit] = (
            (packed_values >> (shift * bits)) & bitmask)
    return padded_values[:num_values]
    def downscale(self, chunk, downscaling_factors):
        if not self.check_factors(downscaling_factors):
            raise NotImplementedError
        new_chunk = np.empty(
            (chunk.shape[0], ceil_div(chunk.shape[1], downscaling_factors[2]),
             ceil_div(chunk.shape[2], downscaling_factors[1]),
             ceil_div(chunk.shape[3], downscaling_factors[0])),
            dtype=chunk.dtype)
        for t, z, y, x in np.ndindex(*new_chunk.shape):
            zd = z * downscaling_factors[2]
            yd = y * downscaling_factors[1]
            xd = x * downscaling_factors[0]
            block = chunk[t, zd:(zd + downscaling_factors[2]),
                          yd:(yd + downscaling_factors[1]),
                          xd:(xd + downscaling_factors[0])]

            labels, counts = np.unique(block.flat, return_counts=True)
            new_chunk[t, z, y, x] = labels[np.argmax(counts)]

        return new_chunk
 def load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax):
     ret = np.empty([num_channels, zmax - zmin, ymax - ymin, xmax - xmin],
                    dtype=np.uint8)
     for x_idx in range(xmin // chunk_size[0],
                        ceil_div(xmax, chunk_size[0])):
         for y_idx in range(ymin // chunk_size[1],
                            ceil_div(ymax, chunk_size[1])):
             for z_idx in range(zmin // chunk_size[2],
                                ceil_div(zmax, chunk_size[2])):
                 chunk_xmin = chunk_size[0] * x_idx
                 chunk_xmax = min(chunk_size[0] * (x_idx + 1), size[0])
                 chunk_ymin = chunk_size[1] * y_idx
                 chunk_ymax = min(chunk_size[1] * (y_idx + 1), size[1])
                 chunk_zmin = chunk_size[2] * z_idx
                 chunk_zmax = min(chunk_size[2] * (z_idx + 1), size[2])
                 chunk = pyramid_io.read_chunk(
                     key, (chunk_xmin, chunk_xmax, chunk_ymin, chunk_ymax,
                           chunk_zmin, chunk_zmax))
                 chunk = scale_chunk_to_uint8(chunk)
                 ret[:, chunk_zmin - zmin:chunk_zmax - zmin,
                     chunk_ymin - ymin:chunk_ymax - ymin,
                     chunk_xmin - xmin:chunk_xmax - xmin] = chunk
     return ret
Esempio n. 8
0
def _pack_encoded_values(encoded_values, bits):
    # TODO optimize with np.packbits for bits == 1
    if bits == 0:
        return bytes()
    else:
        values_per_32bit = 32 // bits
        assert np.all(encoded_values == encoded_values & ((1 << bits) - 1))
        padded_values = np.empty(
            values_per_32bit * ceil_div(len(encoded_values), values_per_32bit),
            dtype="<I")
        padded_values[:len(encoded_values)] = encoded_values
        padded_values[len(encoded_values):] = 0
        packed_values = functools.reduce(
            np.bitwise_or,
            (padded_values[shift::values_per_32bit] << (shift * bits)
             for shift in range(values_per_32bit)))
        return packed_values.tobytes()
def convert_scale(pyramid_io, level, zoomer_accessor):
    # Key is the resolution in micrometres
    key = pyramid_io.info["scales"][level]["key"]
    scale_info = pyramid_io.scale_info(key)
    chunk_size = scale_info["chunk_sizes"][0]
    key = scale_info["key"]
    size = scale_info["size"]
    dtype = np.dtype(pyramid_io.info["data_type"]).newbyteorder("<")
    num_channels = pyramid_io.info["num_channels"]
    assert num_channels == 1

    scale_chunk_to_uint8 = get_chunk_scaler_to_uint8(dtype)

    def load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax):
        ret = np.empty([num_channels, zmax - zmin, ymax - ymin, xmax - xmin],
                       dtype=np.uint8)
        for x_idx in range(xmin // chunk_size[0],
                           ceil_div(xmax, chunk_size[0])):
            for y_idx in range(ymin // chunk_size[1],
                               ceil_div(ymax, chunk_size[1])):
                for z_idx in range(zmin // chunk_size[2],
                                   ceil_div(zmax, chunk_size[2])):
                    chunk_xmin = chunk_size[0] * x_idx
                    chunk_xmax = min(chunk_size[0] * (x_idx + 1), size[0])
                    chunk_ymin = chunk_size[1] * y_idx
                    chunk_ymax = min(chunk_size[1] * (y_idx + 1), size[1])
                    chunk_zmin = chunk_size[2] * z_idx
                    chunk_zmax = min(chunk_size[2] * (z_idx + 1), size[2])
                    chunk = pyramid_io.read_chunk(
                        key, (chunk_xmin, chunk_xmax, chunk_ymin, chunk_ymax,
                              chunk_zmin, chunk_zmax))
                    chunk = scale_chunk_to_uint8(chunk)
                    ret[:, chunk_zmin - zmin:chunk_zmax - zmin,
                        chunk_ymin - ymin:chunk_ymax - ymin,
                        chunk_xmin - xmin:chunk_xmax - xmin] = chunk
        return ret

    def write_x_tiles(tilechunk):
        for x in range(tilechunk.shape[3]):
            tile = tilechunk[0, :, :, x].T
            tile_path = TILE_PATTERN.format("y",
                                            y_idx,
                                            "z",
                                            z_idx,
                                            level=level,
                                            slice_axis="x",
                                            slice_number=x_idx * TILE_SIZE + x)
            write_tile(tile, tile_path)

    def write_y_tiles(tilechunk):
        for y in range(tilechunk.shape[2]):
            tile = tilechunk[0, :, y, :]
            tile_path = TILE_PATTERN.format("z",
                                            z_idx,
                                            "x",
                                            x_idx,
                                            level=level,
                                            slice_axis="y",
                                            slice_number=y_idx * TILE_SIZE + y)
            write_tile(tile, tile_path)

    def write_z_tiles(tilechunk):
        for z in range(tilechunk.shape[1]):
            tile = tilechunk[0, z, :, :]
            tile_path = TILE_PATTERN.format("y",
                                            y_idx,
                                            "x",
                                            x_idx,
                                            level=level,
                                            slice_axis="z",
                                            slice_number=z_idx * TILE_SIZE + z)
            write_tile(tile, tile_path)

    def write_tile(tile, tile_path):
        img = PIL.Image.fromarray(tile)
        io_buf = io.BytesIO()
        img.save(io_buf, format="png")
        zoomer_accessor.store_file(tile_path,
                                   io_buf.getvalue(),
                                   mime_type="image/png")

    progress_bar = tqdm(
        total=(ceil_div(size[0], TILE_SIZE) * ceil_div(size[1], TILE_SIZE) *
               ceil_div(size[2], TILE_SIZE)),
        desc="converting scale {}".format(key),
        unit="tilechunk",
        leave=True)
    for x_idx, y_idx, z_idx in np.ndindex(
        (ceil_div(size[0], TILE_SIZE), ceil_div(size[1], TILE_SIZE),
         ceil_div(size[2], TILE_SIZE))):
        xmin = TILE_SIZE * x_idx
        xmax = min(TILE_SIZE * (x_idx + 1), size[0])
        ymin = TILE_SIZE * y_idx
        ymax = min(TILE_SIZE * (y_idx + 1), size[1])
        zmin = TILE_SIZE * z_idx
        zmax = min(TILE_SIZE * (z_idx + 1), size[2])

        tilechunk = load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax)

        write_x_tiles(tilechunk)
        write_y_tiles(tilechunk)
        write_z_tiles(tilechunk)

        progress_bar.update()
def compute_dyadic_downscaling(info, source_scale_index, downscaler,
                               chunk_reader, chunk_writer):
    # Key is the resolution in micrometres
    old_scale_info = info["scales"][source_scale_index]
    new_scale_info = info["scales"][source_scale_index + 1]
    old_chunk_size = old_scale_info["chunk_sizes"][0]
    new_chunk_size = new_scale_info["chunk_sizes"][0]
    old_key = old_scale_info["key"]
    new_key = new_scale_info["key"]
    old_size = old_scale_info["size"]
    new_size = new_scale_info["size"]
    dtype = np.dtype(info["data_type"]).newbyteorder("<")
    num_channels = info["num_channels"]
    downscaling_factors = [
        1 if os == ns else 2 for os, ns in zip(old_size, new_size)
    ]
    if new_size != [
            ceil_div(os, ds) for os, ds in zip(old_size, downscaling_factors)
    ]:
        raise ValueError("Unsupported downscaling factor between scales "
                         "{} and {} (only 1 and 2 are supported)".format(
                             old_key, new_key))

    downscaler.check_factors(downscaling_factors)

    half_chunk = [
        osz // f for osz, f in zip(old_chunk_size, downscaling_factors)
    ]
    chunk_fetch_factor = [
        nsz // hc for nsz, hc in zip(new_chunk_size, half_chunk)
    ]

    def load_and_downscale_old_chunk(z_idx, y_idx, x_idx):
        xmin = old_chunk_size[0] * x_idx
        xmax = min(old_chunk_size[0] * (x_idx + 1), old_size[0])
        ymin = old_chunk_size[1] * y_idx
        ymax = min(old_chunk_size[1] * (y_idx + 1), old_size[1])
        zmin = old_chunk_size[2] * z_idx
        zmax = min(old_chunk_size[2] * (z_idx + 1), old_size[2])
        old_chunk_coords = (xmin, xmax, ymin, ymax, zmin, zmax)

        chunk = chunk_reader.read_chunk(old_key, old_chunk_coords)

        return downscaler.downscale(chunk, downscaling_factors)

    chunk_range = (ceil_div(new_size[0], new_chunk_size[0]),
                   ceil_div(new_size[1], new_chunk_size[1]),
                   ceil_div(new_size[2], new_chunk_size[2]))
    # TODO how to do progress report correctly with logging?
    for x_idx, y_idx, z_idx in tqdm(np.ndindex(chunk_range),
                                    total=np.prod(chunk_range),
                                    desc="computing scale {}".format(new_key),
                                    unit="chunks",
                                    leave=True):
        xmin = new_chunk_size[0] * x_idx
        xmax = min(new_chunk_size[0] * (x_idx + 1), new_size[0])
        ymin = new_chunk_size[1] * y_idx
        ymax = min(new_chunk_size[1] * (y_idx + 1), new_size[1])
        zmin = new_chunk_size[2] * z_idx
        zmax = min(new_chunk_size[2] * (z_idx + 1), new_size[2])
        new_chunk_coords = (xmin, xmax, ymin, ymax, zmin, zmax)
        new_chunk = np.empty(
            [num_channels, zmax - zmin, ymax - ymin, xmax - xmin], dtype=dtype)
        new_chunk[:, :half_chunk[2], :half_chunk[1], :half_chunk[0]] = (
            load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2],
                                         y_idx * chunk_fetch_factor[1],
                                         x_idx * chunk_fetch_factor[0]))
        if new_chunk.shape[1] > half_chunk[2]:
            new_chunk[:, half_chunk[2]:, :half_chunk[1], :half_chunk[0]] = (
                load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2] + 1,
                                             y_idx * chunk_fetch_factor[1],
                                             x_idx * chunk_fetch_factor[0]))
        if new_chunk.shape[2] > half_chunk[1]:
            new_chunk[:, :half_chunk[2], half_chunk[1]:, :half_chunk[0]] = (
                load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2],
                                             y_idx * chunk_fetch_factor[1] + 1,
                                             x_idx * chunk_fetch_factor[0]))
        if (new_chunk.shape[1] > half_chunk[2]
                and new_chunk.shape[2] > half_chunk[1]):
            new_chunk[:, half_chunk[2]:, half_chunk[1]:, :half_chunk[0]] = (
                load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2] + 1,
                                             y_idx * chunk_fetch_factor[1] + 1,
                                             x_idx * chunk_fetch_factor[0]))
        if new_chunk.shape[3] > half_chunk[0]:
            new_chunk[:, :half_chunk[2], :half_chunk[1],
                      half_chunk[0]:] = (load_and_downscale_old_chunk(
                          z_idx * chunk_fetch_factor[2],
                          y_idx * chunk_fetch_factor[1],
                          x_idx * chunk_fetch_factor[0] + 1))
        if (new_chunk.shape[1] > half_chunk[2]
                and new_chunk.shape[3] > half_chunk[0]):
            new_chunk[:, half_chunk[2]:, :half_chunk[1],
                      half_chunk[0]:] = (load_and_downscale_old_chunk(
                          z_idx * chunk_fetch_factor[2] + 1,
                          y_idx * chunk_fetch_factor[1],
                          x_idx * chunk_fetch_factor[0] + 1))
        if (new_chunk.shape[2] > half_chunk[1]
                and new_chunk.shape[3] > half_chunk[0]):
            new_chunk[:, :half_chunk[2], half_chunk[1]:,
                      half_chunk[0]:] = (load_and_downscale_old_chunk(
                          z_idx * chunk_fetch_factor[2],
                          y_idx * chunk_fetch_factor[1] + 1,
                          x_idx * chunk_fetch_factor[0] + 1))
        if (new_chunk.shape[1] > half_chunk[2]
                and new_chunk.shape[2] > half_chunk[1]
                and new_chunk.shape[3] > half_chunk[0]):
            new_chunk[:, half_chunk[2]:, half_chunk[1]:,
                      half_chunk[0]:] = (load_and_downscale_old_chunk(
                          z_idx * chunk_fetch_factor[2] + 1,
                          y_idx * chunk_fetch_factor[1] + 1,
                          x_idx * chunk_fetch_factor[0] + 1))

        chunk_writer.write_chunk(new_chunk.astype(dtype), new_key,
                                 new_chunk_coords)