Esempio n. 1
0
class CziFile(object):
    """Read Carl Zeiss Image (CZI) file.

    Attributes
    ----------
    header : FileHeaderSegment
        Global file metadata such as file version and GUID.
    metadata : str
        Global image metadata in UTF-8 encoded XML format.

    All attributes are read-only.

    """
    def __init__(self, arg, multifile=True, filesize=None, detectmosaic=True):
        """Open CZI file and read header.

        Raise ValueError if file is not a ZISRAW file.

        Parameters
        ----------
        multifile : bool
            If True (default), the master file of a multi-file container
            will be opened if applicable.
        filesize : int
            Size of file if arg is a file handle pointing to an
            embedded CZI file.
        detectmosaic : bool
            If True (default), mosaic images will be reconstructed from
            SubBlocks with a tile index.

        Notes
        -----
        CziFile instances created from file name must be closed using the
        'close' method, which is automatically called when using the
        'with' statement.

        """
        self._fh = FileHandle(arg, size=filesize)
        try:
            if self._fh.read(10) != b'ZISRAWFILE':
                raise ValueError('not a CZI file')
            self.header = Segment(self._fh, 0).data()
        except Exception:
            self._fh.close()
            raise

        if multifile and self.header.file_part and isinstance(arg, basestring):
            # open master file instead
            self._fh.close()
            name, _ = match_filename(arg)
            self._fh = FileHandle(name)
            self.header = Segment(self._fh, 0).data()
            assert self.header.primary_file_guid == self.header.file_guid
            assert self.header.file_part == 0

        if self.header.update_pending:
            warnings.warn('file is pending update')
        self._filter_mosaic = detectmosaic

    def segments(self, kind=None):
        """Return iterator over Segment data of specified kind.

        Parameters
        ----------
        kind : bytestring or sequence thereof
            Segment id(s) as listed in SEGMENT_ID.
            If None (default), all segments are returned.

        """
        fpos = 0
        while True:
            self._fh.seek(fpos)
            try:
                segment = Segment(self._fh)
            except SegmentNotFoundError:
                break
            if (kind is None) or (segment.sid in kind):
                yield segment.data()
            fpos = segment.data_offset + segment.allocated_size

    def metadata(self, raw=True):
        """Return data from MetadataSegment as XML (default) or dict.

        Return None if no Metadata segment is found.

        """
        if self.header.metadata_position:
            segment = Segment(self._fh, self.header.metadata_position)
            if segment.sid == MetadataSegment.SID:
                return segment.data().data(raw=raw)
        warnings.warn('Metadata segment not found')
        try:
            metadata = next(self.segments(MetadataSegment.SID))
            return metadata.data(raw=raw)
        except StopIteration:
            pass

    @lazyattr
    def subblock_directory(self):
        """Return list of all DirectoryEntryDV in file.

        Use SubBlockDirectorySegment if exists, else find SubBlockSegments.

        """
        if self.header.directory_position:
            segment = Segment(self._fh, self.header.directory_position)
            if segment.sid == SubBlockDirectorySegment.SID:
                return segment.data().entries
        warnings.warn('SubBlockDirectory segment not found')
        return list(segment.directory_entry
                    for segment in self.segments(SubBlockSegment.SID))

    @lazyattr
    def attachment_directory(self):
        """Return list of all AttachmentEntryA1 in file.

        Use AttachmentDirectorySegment if exists, else find AttachmentSegments.

        """
        if self.header.attachment_directory_position:
            segment = Segment(self._fh,
                              self.header.attachment_directory_position)
            if segment.sid == AttachmentDirectorySegment.SID:
                return segment.data().entries
        warnings.warn('AttachmentDirectory segment not found')
        return list(segment.attachment_entry
                    for segment in self.segments(AttachmentSegment.SID))

    def subblocks(self):
        """Return iterator over all SubBlock segments in file."""
        for entry in self.subblock_directory:
            yield entry.data_segment()

    def attachments(self):
        """Return iterator over all Attachment segments in file."""
        for entry in self.attachment_directory:
            yield entry.data_segment()

    def save_attachments(self, directory=None):
        """Save all attachments to files."""
        if directory is None:
            directory = self._fh.path + '.attachments'
        if not os.path.exists(directory):
            os.makedirs(directory)
        for attachment in self.attachments():
            attachment.save(directory=directory)

    @lazyattr
    def filtered_subblock_directory(self):
        """Return sorted list of DirectoryEntryDV if mosaic, else all."""
        if not self._filter_mosaic:
            return self.subblock_directory
        filtered = [
            directory_entry for directory_entry in self.subblock_directory
            if directory_entry.mosaic_index is not None
        ]
        if not filtered:
            return self.subblock_directory
        return list(sorted(filtered, key=lambda x: x.mosaic_index))

    @lazyattr
    def shape(self):
        """Return shape of image data in file."""
        shape = [[
            dim.start + dim.size for dim in directory_entry.dimension_entries
            if dim.dimension != 'M'
        ] for directory_entry in self.filtered_subblock_directory]
        shape = numpy.max(shape, axis=0)
        shape = tuple(int(i - j) for i, j in zip(shape, self.start[:-1]))
        dtype = self.filtered_subblock_directory[0].dtype
        sampleshape = numpy.dtype(dtype).shape
        shape = shape + (sampleshape if sampleshape else (1, ))
        return shape

    @lazyattr
    def start(self):
        """Return minimum start indices per dimension of sub images in file."""
        start = [[
            dim.start for dim in directory_entry.dimension_entries
            if dim.dimension != 'M'
        ] for directory_entry in self.filtered_subblock_directory]
        start = tuple(numpy.min(start, axis=0)) + (0, )
        return start

    @lazyattr
    def axes(self):
        """Return axes of image data in file."""
        return self.filtered_subblock_directory[0].axes

    @lazyattr
    def dtype(self):
        """Return numpy dtype of image data in file."""
        # subblock data can be of different pixel type
        dtype = numpy.dtype(self.filtered_subblock_directory[0].dtype[-2:])
        for directory_entry in self.filtered_subblock_directory:
            dtype = numpy.promote_types(dtype, directory_entry.dtype[-2:])
        return dtype

    def asarray(self, resize=True, order=0, out=None, max_workers=None):
        """Return image data from file(s) as numpy array.

        Parameters
        ----------
        resize : bool
            If True (default), resize sub/supersampled subblock data.
        order : int
            The order of spline interpolation used to resize sub/supersampled
            subblock data. Default is 0 (nearest neighbor).
        out : numpy.ndarray, str, or file-like object; optional
            Buffer where image data will be saved.
            If numpy.ndarray, a writable array of compatible dtype and shape.
            If str or open file, the file name or file object used to
            create a memory-map to an array stored in a binary file on disk.
        max_workers : int
            Maximum number of threads to read and decode subblock data.
            By default up to half the CPU cores are used.

        """
        out = create_output(out, self.shape, self.dtype)

        if max_workers is None:
            max_workers = multiprocessing.cpu_count() // 2

        def func(directory_entry,
                 resize=resize,
                 order=order,
                 start=self.start,
                 out=out):
            """Read, decode, and copy subblock data."""
            subblock = directory_entry.data_segment()
            tile = subblock.data(resize=resize, order=order)
            index = tuple(
                slice(i - j, i - j + k)
                for i, j, k in zip(directory_entry.start, start, tile.shape))
            try:
                out[index] = tile
            except ValueError as e:
                warnings.warn(str(e))

        if max_workers > 1:
            self._fh.lock = True
            with ThreadPoolExecutor(max_workers) as executor:
                executor.map(func, self.filtered_subblock_directory)
            self._fh.lock = None
        else:
            for directory_entry in self.filtered_subblock_directory:
                func(directory_entry)

        if hasattr(out, 'flush'):
            out.flush()
        return out

    def close(self):
        """Close file handle."""
        self._fh.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __str__(self):
        return '\n '.join((
            self._fh.name.capitalize(),
            '(Carl Zeiss Image File)',
            str(self.header),
            'MetadataSegment',
            str(self.axes),
            str(self.shape),
            str(self.dtype),
            pformat(self.metadata()),
        ))
Esempio n. 2
0
class CziFile(object):
    """Carl Zeiss Image (CZI) file.

    Attributes
    ----------
    header : FileHeaderSegment
        Global file metadata such as file version and GUID.
    metadata : etree.ElementTree.Element
        Global image metadata in UTF-8 encoded XML format.

    All attributes are read-only.

    """
    def __init__(self, arg, multifile=True, filesize=None, detectmosaic=True):
        """Open CZI file and read header.

        Raise ValueError if file is not a ZISRAW file.

        Parameters
        ----------
        multifile : bool
            If True (default), the master file of a multifile CZI file
            will be opened if applicable.
        filesize : int
            Size of file if arg is a file handle pointing to an
            embedded CZI file.
        detectmosaic : bool
            If True (default), mosaic images will be reconstructed from
            SubBlocks with a tile index.

        Notes
        -----
        CziFile instances created from file name must be closed using the
        'close' method, which is automatically called when using the
        'with' statement.

        """
        self._fh = FileHandle(arg, size=filesize)
        try:
            if self._fh.read(10) != b'ZISRAWFILE':
                raise ValueError("not a CZI file")
            self.header = Segment(self._fh, 0).data()
        except Exception:
            self._fh.close()
            raise

        if multifile and self.header.file_part and isinstance(arg, basestring):
            # open master file instead
            self._fh.close()
            name, _ = match_filename(arg)
            self._fh = FileHandle(name)
            self.header = Segment(self._fh, 0).data()
            assert (self.header.primary_file_guid == self.header.file_guid)
            assert (self.header.file_part == 0)

        if self.header.update_pending:
            warnings.warn("file is pending update")
        self._filter_mosaic = detectmosaic

    def segments(self, kind=None):
        """Return iterator over Segment data of specified kind.

        Parameters
        ----------
        kind : bytestring or sequence thereof
            Segment id(s) as listed in SEGMENT_ID.
            If None (default), all segments are returned.

        """
        fpos = 0
        while True:
            self._fh.seek(fpos)
            try:
                segment = Segment(self._fh)
            except SegmentNotFoundError:
                break
            if (kind is None) or (segment.sid in kind):
                yield segment.data()
            fpos = segment.data_offset + segment.allocated_size

    @lazyattr
    def metadata(self):
        """Return data from MetadataSegment as xml.ElementTree root Element.

        Return None if no Metadata segment is found.

        """
        if self.header.metadata_position:
            segment = Segment(self._fh, self.header.metadata_position)
            if segment.sid == MetadataSegment.SID:
                data = segment.data().data()
                return etree.fromstring(data.encode('utf-8'))
        warnings.warn("Metadata segment not found")
        try:
            metadata = next(self.segments(MetadataSegment.SID))
            return etree.fromstring(metadata.data().encode('utf-8'))
        except StopIteration:
            pass

    @lazyattr
    def subblock_directory(self):
        """Return list of all DirectoryEntryDV in file.

        Use SubBlockDirectorySegment if exists, else find SubBlockSegments.

        """
        if self.header.directory_position:
            segment = Segment(self._fh, self.header.directory_position)
            if segment.sid == SubBlockDirectorySegment.SID:
                return segment.data().entries
        warnings.warn("SubBlockDirectory segment not found")
        return list(segment.directory_entry
                    for segment in self.segments(SubBlockSegment.SID))

    @lazyattr
    def attachment_directory(self):
        """Return list of all AttachmentEntryA1 in file.

        Use AttachmentDirectorySegment if exists, else find AttachmentSegments.

        """
        if self.header.attachment_directory_position:
            segment = Segment(self._fh,
                              self.header.attachment_directory_position)
            if segment.sid == AttachmentDirectorySegment.SID:
                return segment.data().entries
        warnings.warn("AttachmentDirectory segment not found")
        return list(segment.attachment_entry
                    for segment in self.segments(AttachmentSegment.SID))

    def subblocks(self):
        """Return iterator over all SubBlock segments in file."""
        for entry in self.subblock_directory:
            yield entry.data_segment()

    def attachments(self):
        """Return iterator over all Attachment segments in file."""
        for entry in self.attachment_directory:
            yield entry.data_segment()

    def save_attachments(self, directory=None):
        """Save all attachments to files."""
        if directory is None:
            directory = self._fh.path + '.attachments'
        if not os.path.exists(directory):
            os.makedirs(directory)
        for attachment in self.attachments():
            attachment.save(directory=directory)

    @lazyattr
    def filtered_subblock_directory(self):
        """Return sorted list of DirectoryEntryDV if mosaic, else all."""
        if not self._filter_mosaic:
            return self.subblock_directory
        filtered = [
            directory_entry for directory_entry in self.subblock_directory
            if directory_entry.mosaic_index is not None
        ]
        if not filtered:
            return self.subblock_directory
        return list(sorted(filtered, key=lambda x: x.mosaic_index))

    @lazyattr
    def shape(self):
        """Return shape of image data in file."""
        shape = [[
            dim.start + dim.size for dim in directory_entry.dimension_entries
            if dim.dimension != b'M'
        ] for directory_entry in self.filtered_subblock_directory]
        shape = numpy.max(shape, axis=0)
        shape = tuple(int(i - j) for i, j in zip(shape, self.start[:-1]))
        dtype = self.filtered_subblock_directory[0].dtype
        sampleshape = numpy.dtype(dtype).shape
        shape = shape + (sampleshape if sampleshape else (1, ))
        return shape

    @lazyattr
    def start(self):
        """Return minimum start indices per dimension of sub images in file."""
        start = [[
            dim.start for dim in directory_entry.dimension_entries
            if dim.dimension != b'M'
        ] for directory_entry in self.filtered_subblock_directory]
        start = tuple(numpy.min(start, axis=0)) + (0, )
        return start

    @lazyattr
    def axes(self):
        """Return axes of image data in file."""
        return self.filtered_subblock_directory[0].axes

    @lazyattr
    def dtype(self):
        """Return dtype of image data in file."""
        # subblock data can be of different pixel type
        dtype = self.filtered_subblock_directory[0].dtype[-2:]
        for directory_entry in self.filtered_subblock_directory:
            dtype = numpy.promote_types(dtype, directory_entry.dtype[-2:])
        return dtype

    def asarray(self, bgr2rgb=False, resize=True, order=1, memmap=False):
        """Return image data from file(s) as numpy array.

        Parameters
        ----------
        bgr2rgb : bool
            If True, exchange red and blue samples if applicable.
        resize : bool
            If True (default), resize sub/supersampled subblock data.
        order : int
            The order of spline interpolation used to resize sub/supersampled
            subblock data. Default is 1 (bilinear).
        memmap : bool
            If True, return an array stored in a binary file on disk.

        """
        if memmap:
            with tempfile.NamedTemporaryFile() as fh:
                image = numpy.memmap(fh, dtype=self.dtype, shape=self.shape)
        else:
            image = numpy.zeros(self.shape, self.dtype)

        for directory_entry in self.filtered_subblock_directory:
            subblock = directory_entry.data_segment()
            tile = subblock.data(bgr2rgb=bgr2rgb, resize=resize, order=order)
            index = [
                slice(i - j, i - j + k) for i, j, k in zip(
                    directory_entry.start, self.start, tile.shape)
            ]
            try:
                image[index] = tile
            except ValueError as e:
                warnings.warn(str(e))
        return image

    def close(self):
        self._fh.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __str__(self):
        return '\n '.join(
            (self._fh.name.capitalize(), "(Carl Zeiss Image File)",
             str(self.header), "MetadataSegment", str(self.axes),
             str(self.shape), str(self.dtype),
             str(etree.tostring(self.metadata))))
Esempio n. 3
0
class CziFile(object):
    """Carl Zeiss Image (CZI) file.

    Attributes
    ----------
    header : FileHeaderSegment
        Global file metadata such as file version and GUID.
    metadata : etree.ElementTree.Element
        Global image metadata in UTF-8 encoded XML format.

    All attributes are read-only.

    """

    def __init__(self, arg, multifile=True, filesize=None, detectmosaic=True):
        """Open CZI file and read header.

        Raise ValueError if file is not a ZISRAW file.

        Parameters
        ----------
        multifile : bool
            If True (default), the master file of a multifile CZI file
            will be opened if applicable.
        filesize : int
            Size of file if arg is a file handle pointing to an
            embedded CZI file.
        detectmosaic : bool
            If True (default), mosaic images will be reconstructed from
            SubBlocks with a tile index.

        Notes
        -----
        CziFile instances created from file name must be closed using the
        'close' method, which is automatically called when using the
        'with' statement.

        """
        self._fh = FileHandle(arg, size=filesize)
        try:
            if self._fh.read(10) != b'ZISRAWFILE':
                raise ValueError("not a CZI file")
            self.header = Segment(self._fh, 0).data()
        except Exception:
            self._fh.close()
            raise

        if multifile and self.header.file_part and isinstance(arg, basestring):
            # open master file instead
            self._fh.close()
            name, _ = match_filename(arg)
            self._fh = FileHandle(name)
            self.header = Segment(self._fh, 0).data()
            assert(self.header.primary_file_guid == self.header.file_guid)
            assert(self.header.file_part == 0)

        if self.header.update_pending:
            warnings.warn("file is pending update")
        self._filter_mosaic = detectmosaic

    def segments(self, kind=None):
        """Return iterator over Segment data of specified kind.

        Parameters
        ----------
        kind : bytestring or sequence thereof
            Segment id(s) as listed in SEGMENT_ID.
            If None (default), all segments are returned.

        """
        fpos = 0
        while True:
            self._fh.seek(fpos)
            try:
                segment = Segment(self._fh)
            except SegmentNotFoundError:
                break
            if (kind is None) or (segment.sid in kind):
                yield segment.data()
            fpos = segment.data_offset + segment.allocated_size

    @lazyattr
    def metadata(self):
        """Return data from MetadataSegment as xml.ElementTree root Element.

        Return None if no Metadata segment is found.

        """
        if self.header.metadata_position:
            segment = Segment(self._fh, self.header.metadata_position)
            if segment.sid == MetadataSegment.SID:
                data = segment.data().data()
                return etree.fromstring(data.encode('utf-8'))
        warnings.warn("Metadata segment not found")
        try:
            metadata = next(self.segments(MetadataSegment.SID))
            return etree.fromstring(metadata.data().encode('utf-8'))
        except StopIteration:
            pass

    @lazyattr
    def subblock_directory(self):
        """Return list of all DirectoryEntryDV in file.

        Use SubBlockDirectorySegment if exists, else find SubBlockSegments.

        """
        if self.header.directory_position:
            segment = Segment(self._fh, self.header.directory_position)
            if segment.sid == SubBlockDirectorySegment.SID:
                return segment.data().entries
        warnings.warn("SubBlockDirectory segment not found")
        return list(segment.directory_entry for segment in
                    self.segments(SubBlockSegment.SID))

    @lazyattr
    def attachment_directory(self):
        """Return list of all AttachmentEntryA1 in file.

        Use AttachmentDirectorySegment if exists, else find AttachmentSegments.

        """
        if self.header.attachment_directory_position:
            segment = Segment(self._fh,
                              self.header.attachment_directory_position)
            if segment.sid == AttachmentDirectorySegment.SID:
                return segment.data().entries
        warnings.warn("AttachmentDirectory segment not found")
        return list(segment.attachment_entry for segment in
                    self.segments(AttachmentSegment.SID))

    def subblocks(self):
        """Return iterator over all SubBlock segments in file."""
        for entry in self.subblock_directory:
            yield entry.data_segment()

    def attachments(self):
        """Return iterator over all Attachment segments in file."""
        for entry in self.attachment_directory:
            yield entry.data_segment()

    def save_attachments(self, directory=None):
        """Save all attachments to files."""
        if directory is None:
            directory = self._fh.path + '.attachments'
        if not os.path.exists(directory):
            os.makedirs(directory)
        for attachment in self.attachments():
            attachment.save(directory=directory)

    @lazyattr
    def filtered_subblock_directory(self):
        """Return sorted list of DirectoryEntryDV if mosaic, else all."""
        if not self._filter_mosaic:
            return self.subblock_directory
        filtered = [directory_entry
                    for directory_entry in self.subblock_directory
                    if directory_entry.mosaic_index is not None]
        if not filtered:
            return self.subblock_directory
        return list(sorted(filtered, key=lambda x: x.mosaic_index))

    @lazyattr
    def shape(self):
        """Return shape of image data in file."""
        shape = [[dim.start + dim.size
                  for dim in directory_entry.dimension_entries
                  if dim.dimension != b'M']
                 for directory_entry in self.filtered_subblock_directory]
        shape = numpy.max(shape, axis=0)
        shape = tuple(int(i-j) for i, j in zip(shape, self.start[:-1]))
        dtype = self.filtered_subblock_directory[0].dtype
        sampleshape = numpy.dtype(dtype).shape
        shape = shape + (sampleshape if sampleshape else (1,))
        return shape

    @lazyattr
    def start(self):
        """Return minimum start indices per dimension of sub images in file."""
        start = [[dim.start
                  for dim in directory_entry.dimension_entries
                  if dim.dimension != b'M']
                 for directory_entry in self.filtered_subblock_directory]
        start = tuple(numpy.min(start, axis=0)) + (0,)
        return start

    @lazyattr
    def axes(self):
        """Return axes of image data in file."""
        return self.filtered_subblock_directory[0].axes

    @lazyattr
    def dtype(self):
        """Return dtype of image data in file."""
        # subblock data can be of different pixel type
        dtype = self.filtered_subblock_directory[0].dtype[-2:]
        for directory_entry in self.filtered_subblock_directory:
            dtype = numpy.promote_types(dtype, directory_entry.dtype[-2:])
        return dtype

    def asarray(self, bgr2rgb=False, resize=True, order=1, memmap=False):
        """Return image data from file(s) as numpy array.

        Parameters
        ----------
        bgr2rgb : bool
            If True, exchange red and blue samples if applicable.
        resize : bool
            If True (default), resize sub/supersampled subblock data.
        order : int
            The order of spline interpolation used to resize sub/supersampled
            subblock data. Default is 1 (bilinear).
        memmap : bool
            If True, return an array stored in a binary file on disk.

        """
        if memmap:
            with tempfile.NamedTemporaryFile() as fh:
                image = numpy.memmap(fh, dtype=self.dtype, shape=self.shape)
        else:
            image = numpy.zeros(self.shape, self.dtype)

        for directory_entry in self.filtered_subblock_directory:
            subblock = directory_entry.data_segment()
            tile = subblock.data(bgr2rgb=bgr2rgb, resize=resize, order=order)
            index = [slice(i-j, i-j+k) for i, j, k in
                     zip(directory_entry.start, self.start, tile.shape)]
            try:
                image[index] = tile
            except ValueError as e:
                warnings.warn(str(e))
        return image

    def close(self):
        self._fh.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __str__(self):
        return '\n '.join((
            self._fh.name.capitalize(),
            "(Carl Zeiss Image File)",
            str(self.header),
            "MetadataSegment",
            str(self.axes),
            str(self.shape),
            str(self.dtype),
            str(etree.tostring(self.metadata))))