예제 #1
0
def read_preamble(fp: BinaryIO, force: bool) -> Optional[bytes]:
    """Return the 128-byte DICOM preamble in `fp` if present.

    `fp` should be positioned at the start of the file-like. If the preamble
    and prefix are found then after reading `fp` will be positioned at the
    first byte after the prefix (byte offset 133). If either the preamble or
    prefix are missing and `force` is ``True`` then after reading `fp` will be
    positioned at the start of the file-like.

    Parameters
    ----------
    fp : file-like object
        The file-like to read the preamble from.
    force : bool
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : bytes or None
        The 128-byte DICOM preamble will be returned if the appropriate prefix
        ('DICM') is found at byte offset 128. Returns ``None`` if the 'DICM'
        prefix is not found and `force` is ``True``.

    Raises
    ------
    InvalidDicomError
        If `force` is ``False`` and no appropriate header information found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading File Meta Information preamble...")
    preamble = fp.read(128)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug(f"{fp.tell() - 128:08x}: {sample}")

    logger.debug("Reading File Meta Information prefix...")
    magic = fp.read(4)
    if magic != b"DICM" and force:
        logger.info(
            "File is not conformant with the DICOM File Format: 'DICM' "
            "prefix is missing from the File Meta Information header "
            "or the header itself is missing. Assuming no header and "
            "continuing.")
        fp.seek(0)
        return None

    if magic != b"DICM" and not force:
        raise InvalidDicomError(
            "File is missing DICOM File Meta Information header or the 'DICM' "
            "prefix is missing from the header. Use force=True to force "
            "reading.")
    else:
        logger.debug(f"{fp.tell() - 4:08x}: 'DICM' prefix found")

    return preamble
예제 #2
0
def _read_file_meta_info(fp):
    """Return the file meta information.
    fp must be set after the 128 byte preamble and 'DICM' marker
    """
    # File meta info always LittleEndian, Explicit VR. After will change these
    #    to the transfer syntax values set in the meta info

    # Get group length data element, whose value is the length of the meta_info
    fp_save = fp.tell()  # in case need to rewind
    debugging = config.debugging
    if debugging:
        logger.debug("Try to read group length info...")
    bytes_read = fp.read(8)
    group, elem, VR, length = unpack("<HH2sH", bytes_read)
    if debugging:
        debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read))
    if not in_py2:
        VR = VR.decode(default_encoding)
    if VR in extra_length_VRs:
        bytes_read = fp.read(4)
        length = unpack("<L", bytes_read)[0]
        if debugging:
            debug_msg += " " + bytes2hex(bytes_read)
    if debugging:
        debug_msg = "{0:<47s}  ({1:04x}, {2:04x}) {3:2s} Length: {4:d}".format(debug_msg, group, elem, VR, length)
        logger.debug(debug_msg)

    # Store meta group length if it exists, then read until not group 2
    if group == 2 and elem == 0:
        bytes_read = fp.read(length)
        if debugging:
            logger.debug("{0:08x}: {1}".format(fp.tell() - length, bytes2hex(bytes_read)))
        group_length = unpack("<L", bytes_read)[0]
        expected_ds_start = fp.tell() + group_length
        if debugging:
            msg = "value (group length) = {0:d}".format(group_length)
            msg += "  regular dataset should start at {0:08x}".format(expected_ds_start)
            logger.debug(" " * 10 + msg)
    else:
        expected_ds_start = None
        if debugging:
            logger.debug(" " * 10 + "(0002,0000) Group length not found.")

    # Changed in pydicom 0.9.7 -- don't trust the group length, just read
    #    until no longer group 2 data elements. But check the length and
    #    give a warning if group 2 ends at different location.
    # Rewind to read the first data element as part of the file_meta dataset
    if debugging:
        logger.debug("Rewinding and reading whole dataset " "including this first data element")
    fp.seek(fp_save)
    file_meta = read_dataset(fp, is_implicit_VR=False, is_little_endian=True, stop_when=not_group2)
    fp_now = fp.tell()
    if expected_ds_start and fp_now != expected_ds_start:
        logger.info("*** Group length for file meta dataset " "did not match end of group 2 data ***")
    else:
        if debugging:
            logger.debug("--- End of file meta data found " "as expected ---------")
    return file_meta
예제 #3
0
def read_preamble(fp, force):
    """Return the 128-byte DICOM preamble in `fp` if present.

    `fp` should be positioned at the start of the file-like. If the preamble
    and prefix are found then after reading `fp` will be positioned at the
    first byte after the prefix (byte offset 133). If either the preamble or
    prefix are missing and `force` is True then after reading `fp` will be
    positioned at the start of the file-like.

    Parameters
    ----------
    fp : file-like object
        The file-like to read the preamble from.
    force : bool
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : str/bytes or None
        The 128-byte DICOM preamble will be returned if the appropriate prefix
        ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix
        is not found and `force` is True.

    Raises
    ------
    InvalidDicomError
        If `force` is False and no appropriate header information found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading File Meta Information preamble...")
    preamble = fp.read(128)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 128, sample))

    logger.debug("Reading File Meta Information prefix...")
    magic = fp.read(4)
    if magic != b"DICM" and force:
        logger.info(
            "File is not conformant with the DICOM File Format: 'DICM' "
            "prefix is missing from the File Meta Information header "
            "or the header itself is missing. Assuming no header and "
            "continuing.")
        preamble = None
        fp.seek(0)
    elif magic != b"DICM" and not force:
        raise InvalidDicomError("File is missing DICOM File Meta Information "
                                "header or the 'DICM' prefix is missing from "
                                "the header. Use force=True to force reading.")
    else:
        logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4))
    return preamble
예제 #4
0
    def test_bytes_to_hex(self):
        """Test utils.hexutil.hex2bytes"""
        hexstring = "00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f"
        bytestring = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09' \
                     b'\x0A\x0B\x0C\x0D\x0E\x0F'
        assert bytes2hex(bytestring) == hexstring

        hexstring = "00 10 20 30 40 50 60 70 80 90 a0 b0 c0 d0 e0 f0"
        bytestring = b'\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90' \
                     b'\xA0\xB0\xC0\xD0\xE0\xF0'
        assert bytes2hex(bytestring) == hexstring
예제 #5
0
    def test_bytes_to_hex(self):
        """Test utils.hexutil.hex2bytes"""
        hexstring = "00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f"
        bytestring = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09' \
                     b'\x0A\x0B\x0C\x0D\x0E\x0F'
        assert bytes2hex(bytestring) == hexstring

        hexstring = "00 10 20 30 40 50 60 70 80 90 a0 b0 c0 d0 e0 f0"
        bytestring = b'\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90' \
                     b'\xA0\xB0\xC0\xD0\xE0\xF0'
        assert bytes2hex(bytestring) == hexstring
예제 #6
0
 def test_empty_AT(self):
     """Write empty AT correctly.........."""
     # Was issue 74
     data_elem = DataElement(0x00280009, "AT", [])
     expected = hex2bytes((
         " 28 00 09 00"  # (0028,0009) Frame Increment Pointer
         " 00 00 00 00"  # length 0
     ))
     write_data_element(self.f1, data_elem)
     got = self.f1.parent.getvalue()
     msg = ("Did not write zero-length AT value correctly. "
            "Expected %r, got %r") % (bytes2hex(expected), bytes2hex(got))
     msg = "%r %r" % (type(expected), type(got))
     msg = "'%r' '%r'" % (expected, got)
     self.assertEqual(expected, got, msg)
예제 #7
0
 def test_empty_AT(self):
     """Write empty AT correctly.........."""
     # Was issue 74
     data_elem = DataElement(0x00280009, "AT", [])
     expected = hex2bytes((
         " 28 00 09 00"   # (0028,0009) Frame Increment Pointer
         " 00 00 00 00"   # length 0
     ))
     write_data_element(self.f1, data_elem)
     got = self.f1.parent.getvalue()
     msg = ("Did not write zero-length AT value correctly. "
            "Expected %r, got %r") % (bytes2hex(expected), bytes2hex(got))
     msg = "%r %r" % (type(expected), type(got))
     msg = "'%r' '%r'" % (expected, got)
     self.assertEqual(expected, got, msg)
예제 #8
0
def read_preamble(fp, force):
    """Return the 128-byte DICOM preamble in `fp` if present.

    Parameters
    ----------
    fp : file-like object
        The file-like to read the preamble from.
    force : bool
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : str/bytes or None
        The 128-byte DICOM preamble will be returned if the appropriate prefix
        ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix
        is not found and `force` is True.

    Raises
    ------
    InvalidDicomError
        If `force` is False and no appropriate header information found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading preamble...")
    preamble = fp.read(0x80)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample))
    magic = fp.read(4)
    if magic != b"DICM":
        if force:
            logger.info(
                "File is not a conformant DICOM file; 'DICM' prefix is "
                "missing from the file header or the header is "
                "missing. Assuming no header and continuing.")
            preamble = None
            fp.seek(0)
        else:
            raise InvalidDicomError("File is missing DICOM header or 'DICM' "
                                    "prefix is missing from the header. Use "
                                    "force=True to force reading.")
    else:
        logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4))
    return preamble
예제 #9
0
def read_preamble(fp, force):
    """Read and return the DICOM preamble.

    Parameters
    ----------
    fp : file-like object
    force : boolean
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : DICOM preamble, None
        The DICOM preamble will be returned if appropriate
        header ('DICM') is found. Returns None if no header
        is found.

    Raises
    ------
    InvalidDicomError
        If force flag is false and no appropriate header information
        found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading preamble...")
    preamble = fp.read(0x80)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample))
    magic = fp.read(4)
    if magic != b"DICM":
        if force:
            logger.info("File is not a standard DICOM file; 'DICM' header is "
                        "missing. Assuming no header and continuing")
            preamble = None
            fp.seek(0)
        else:
            raise InvalidDicomError("File is missing 'DICM' marker. "
                                    "Use force=True to force reading")
    else:
        logger.debug("{0:08x}: 'DICM' marker found".format(fp.tell() - 4))
    return preamble
예제 #10
0
def read_preamble(fp, force):
    """Read and return the DICOM preamble.

    Parameters
    ----------
    fp : file-like object
    force : boolean
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : DICOM preamble, None
        The DICOM preamble will be returned if appropriate
        header ('DICM') is found. Returns None if no header
        is found.

    Raises
    ------
    InvalidDicomError
        If force flag is false and no appropriate header information
        found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading preamble...")
    preamble = fp.read(0x80)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample))
    magic = fp.read(4)
    if magic != b"DICM":
        if force:
            logger.info("File is not a standard DICOM file; 'DICM' header is "
                        "missing. Assuming no header and continuing")
            preamble = None
            fp.seek(0)
        else:
            raise InvalidDicomError("File is missing 'DICM' marker. "
                                    "Use force=True to force reading")
    else:
        logger.debug("{0:08x}: 'DICM' marker found".format(fp.tell() - 4))
    return preamble
예제 #11
0
def read_sequence_item(fp,
                       is_implicit_VR,
                       is_little_endian,
                       encoding,
                       offset=0):
    """Read and return a single :class:`~pydicom.sequence.Sequence` item, i.e.
    a :class:`~pydicom.dataset.Dataset`.
    """
    seq_item_tell = fp.tell() + offset
    if is_little_endian:
        tag_length_format = "<HHL"
    else:
        tag_length_format = ">HHL"
    try:
        bytes_read = fp.read(8)
        group, element, length = unpack(tag_length_format, bytes_read)
    except BaseException:
        raise IOError("No tag to read at file position "
                      "{0:05x}".format(fp.tell() + offset))
    tag = (group, element)
    if tag == SequenceDelimiterTag:  # No more items, time to stop reading
        logger.debug("{0:08x}: {1}".format(fp.tell() - 8 + offset,
                                           "End of Sequence"))
        if length != 0:
            logger.warning("Expected 0x00000000 after delimiter, found 0x%x, "
                           "at position 0x%x" %
                           (length, fp.tell() - 4 + offset))
        return None
    if tag != ItemTag:
        logger.warning("Expected sequence item with tag %s at file position "
                       "0x%x" % (ItemTag, fp.tell() - 4 + offset))
    else:
        logger.debug("{0:08x}: {1}  Found Item tag (start of item)".format(
            fp.tell() - 4 + offset, bytes2hex(bytes_read)))
    if length == 0xFFFFFFFF:
        ds = read_dataset(fp,
                          is_implicit_VR,
                          is_little_endian,
                          bytelength=None,
                          parent_encoding=encoding,
                          at_top_level=False)
        ds.is_undefined_length_sequence_item = True
    else:
        ds = read_dataset(fp,
                          is_implicit_VR,
                          is_little_endian,
                          length,
                          parent_encoding=encoding,
                          at_top_level=False)
        ds.is_undefined_length_sequence_item = False
        logger.debug("%08x: Finished sequence item" % (fp.tell() + offset, ))
    ds.seq_item_tell = seq_item_tell
    return ds
예제 #12
0
def read_sequence_item(fp, is_implicit_VR, is_little_endian, encoding,
                       offset=0):
    """Read and return a single sequence item, i.e. a Dataset"""
    seq_item_tell = fp.tell() + offset
    if is_little_endian:
        tag_length_format = "<HHL"
    else:
        tag_length_format = ">HHL"
    try:
        bytes_read = fp.read(8)
        group, element, length = unpack(tag_length_format, bytes_read)
    except BaseException:
        raise IOError("No tag to read at file position "
                      "{0:05x}".format(fp.tell() + offset))
    tag = (group, element)
    if tag == SequenceDelimiterTag:  # No more items, time to stop reading
        logger.debug(
            "{0:08x}: {1}".format(fp.tell() - 8 + offset, "End of Sequence"))
        if length != 0:
            logger.warning("Expected 0x00000000 after delimiter, found 0x%x, "
                           "at position 0x%x" % (
                               length, fp.tell() - 4 + offset))
        return None
    if tag != ItemTag:
        logger.warning("Expected sequence item with tag %s at file position "
                       "0x%x" % (ItemTag, fp.tell() - 4 + offset))
    else:
        logger.debug("{0:08x}: {1}  Found Item tag (start of item)".format(
            fp.tell() - 4 + offset, bytes2hex(bytes_read)))
    if length == 0xFFFFFFFF:
        ds = read_dataset(fp, is_implicit_VR, is_little_endian,
                          bytelength=None, parent_encoding=encoding)
        ds.is_undefined_length_sequence_item = True
    else:
        ds = read_dataset(fp, is_implicit_VR, is_little_endian, length,
                          parent_encoding=encoding)
        ds.is_undefined_length_sequence_item = False
        logger.debug("%08x: Finished sequence item" % (fp.tell() + offset,))
    ds.seq_item_tell = seq_item_tell
    return ds
예제 #13
0
def data_element_generator(fp,
                           is_implicit_VR,
                           is_little_endian,
                           stop_when=None,
                           defer_size=None,
                           encoding=default_encoding,
                           specific_tags=None):
    """Create a generator to efficiently return the raw data elements.

    Parameters
    ----------
    fp : file-like object
    is_implicit_VR : boolean
    is_little_endian : boolean
    stop_when : None, callable, optional
        If None (default), then the whole file is read.
        A callable which takes tag, VR, length,
        and returns True or False. If it returns True,
        read_data_element will just return.
    defer_size : int, str, None, optional
        See ``dcmread`` for parameter info.
    encoding :
        Encoding scheme
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    VR : None if implicit VR, otherwise the VR read from the file
    length :
        the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL)
    value_bytes :
        the raw bytes from the DICOM file
        (not parsed into python types)
    is_little_endian : boolean
        True if transfer syntax is little endian; else False.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = set()
    has_specific_char_set = True
    if specific_tags is not None:
        for tag in specific_tags:
            if isinstance(tag, (str, compat.text_type)):
                tag = Tag(tag_for_keyword(tag))
            if isinstance(tag, BaseTag):
                tag_set.add(tag)
        has_specific_char_set = Tag(0x08, 0x05) in tag_set
        tag_set.add(Tag(0x08, 0x05))
    has_tag_set = len(tag_set) > 0

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size
                    and tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(
                                     value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                from pydicom.values import convert_string
                encoding = convert_string(value,
                                          is_little_endian,
                                          encoding=default_encoding)
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)
                if not has_specific_char_set:
                    continue

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue
                yield DataElement(tag,
                                  VR,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set,
                # then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value,
                                              is_little_endian,
                                              encoding=default_encoding)
                    # Store the encoding value in the generator for use
                    # with future elements (SQs)
                    encoding = convert_encodings(encoding)
                    if not has_specific_char_set:
                        continue

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue
                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
예제 #14
0
def data_element_generator(fp, is_implicit_VR, is_little_endian,
                           stop_when=None, defer_size=None, encoding=default_encoding):
    """Create a generator to efficiently return the raw data elements.

    Parameters
    ----------
    fp : file-like object
    is_implicit_VR : boolean
    is_little_endian : boolean
    stop_when : None, callable, optional
        If None (default), then the whole file is read.
        A callable which takes tag, VR, length,
        and returns True or False. If it returns True,
        read_data_element will raise StopIteration.
    defer_size : int, str, None, optional
        See ``read_file`` for parameter info.
    encoding :
        Encoding scheme

    Returns
    -------
    VR : None if implicit VR, otherwise the VR read from the file
    length :
        the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL)
    value_bytes :
        the raw bytes from the DICOM file
        (not parsed into python types)
    is_little_endian : boolean
        True if transfer syntax is little endian; else False.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            raise StopIteration  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            if defer_size is not None and length > defer_size:
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell,
                                                           bytes2hex(value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == (0x08, 0x05):
                from pydicom.values import convert_string
                encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                # Store the encoding value in the generator for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR,
                                    is_little_endian, length, encoding)
                yield DataElement(tag, VR, seq, value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set, then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                    # Store the encoding value in the generator for use with future elements (SQs)
                    encoding = convert_encodings(encoding)

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
예제 #15
0
def data_element_generator(
    fp: BinaryIO,
    is_implicit_VR: bool,
    is_little_endian: bool,
    stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None,
    defer_size: Optional[Union[int, str, float]] = None,
    encoding: Union[str, MutableSequence[str]] = default_encoding,
    specific_tags: Optional[List[BaseTag]] = None
) -> Iterator[Union[RawDataElement, DataElement]]:
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str or float, optional
        See :func:`dcmread` for parameter info.
    encoding : Union[str, MutableSequence[str]]
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Yields
    -------
    RawDataElement or DataElement
        Yields DataElement for undefined length UN or SQ, RawDataElement
        otherwise.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element
    from pydicom.values import convert_string

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"

    # assign implicit VR struct to variable as use later if VR assumed missing
    implicit_VR_struct = Struct(endian_chr + "HHL")
    if is_implicit_VR:
        element_struct = implicit_VR_struct
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set()
    has_tag_set = bool(tag_set)
    if has_tag_set:
        tag_set.add(Tag(0x00080005))  # Specific Character Set

    while True:
        # VR: Optional[str]

        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file

        if debugging:
            debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}"

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            # defend against switching to implicit VR, some writer do in SQ's
            # issue 1067, issue 1035

            if not (b'AA' <= VR <= b'ZZ') and config.assume_implicit_vr_switch:
                # invalid VR, must be 2 cap chrs, assume implicit and continue
                VR = None
                group, elem, length = implicit_VR_struct.unpack(bytes_read)
            else:
                VR = VR.decode(default_encoding)
                if VR in extra_length_VRs:
                    bytes_read = fp_read(4)
                    length = extra_length_unpack(bytes_read)[0]
                    if debugging:
                        debug_msg += " " + bytes2hex(bytes_read)

        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size and
                    tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (
                    fp_read(length) if length > 0
                    else cast(
                        Optional[bytes], empty_value_for_VR(VR, raw=True)
                    )
                )
                if debugging:
                    dotdot = "..." if length > 20 else "   "
                    displayed_value = value[:20] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                # *Specific Character String* is b'' for empty value
                encoding = convert_string(
                    cast(bytes, value) or b'', is_little_endian
                )
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # VR UN with undefined length shall be handled as SQ
            # see PS 3.5, section 6.2.2
            if VR == 'UN':
                VR = 'SQ'
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None or VR == 'UN' and config.replace_un_with_known_vr:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = _unpack_tag(fp_read(4), endian_chr)
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    logger_debug(
                        f"{fp_tell():08X}: Reading/parsing undefined length "
                        "sequence"
                    )

                seq = read_sequence(fp, is_implicit_VR,
                                    is_little_endian, length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue

                yield DataElement(tag, VR, seq, value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(
                    fp, is_little_endian, delimiter, defer_size
                )

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
예제 #16
0
def _read_file_meta_info(fp):
    """Return the file meta information.
    fp must be set after the 128 byte preamble and 'DICM' marker
    """
    # File meta info always LittleEndian, Explicit VR. After will change these
    #    to the transfer syntax values set in the meta info

    # Get group length data element, whose value is the length of the meta_info
    fp_save = fp.tell()  # in case need to rewind
    debugging = config.debugging
    if debugging:
        logger.debug("Try to read group length info...")
    bytes_read = fp.read(8)
    group, elem, VR, length = unpack("<HH2sH", bytes_read)
    if debugging:
        debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read))
    if not in_py2:
        VR = VR.decode(default_encoding)
    if VR in extra_length_VRs:
        bytes_read = fp.read(4)
        length = unpack("<L", bytes_read)[0]
        if debugging:
            debug_msg += " " + bytes2hex(bytes_read)
    if debugging:
        debug_msg = "{0:<47s}  ({1:04x}, {2:04x}) {3:2s} Length: {4:d}".format(
            debug_msg, group, elem, VR, length)
        logger.debug(debug_msg)

    # Store meta group length if it exists, then read until not group 2
    if group == 2 and elem == 0:
        bytes_read = fp.read(length)
        if debugging:
            logger.debug("{0:08x}: {1}".format(fp.tell() - length,
                                               bytes2hex(bytes_read)))
        group_length = unpack("<L", bytes_read)[0]
        expected_ds_start = fp.tell() + group_length
        if debugging:
            msg = "value (group length) = {0:d}".format(group_length)
            msg += "  regular dataset should start at {0:08x}".format(
                expected_ds_start)
            logger.debug(" " * 10 + msg)
    else:
        expected_ds_start = None
        if debugging:
            logger.debug(" " * 10 + "(0002,0000) Group length not found.")

    # Changed in pydicom 0.9.7 -- don't trust the group length, just read
    #    until no longer group 2 data elements. But check the length and
    #    give a warning if group 2 ends at different location.
    # Rewind to read the first data element as part of the file_meta dataset
    if debugging:
        logger.debug("Rewinding and reading whole dataset "
                     "including this first data element")
    fp.seek(fp_save)
    file_meta = read_dataset(fp,
                             is_implicit_VR=False,
                             is_little_endian=True,
                             stop_when=not_group2)
    fp_now = fp.tell()
    if expected_ds_start and fp_now != expected_ds_start:
        logger.info("*** Group length for file meta dataset "
                    "did not match end of group 2 data ***")
    else:
        if debugging:
            logger.debug("--- End of file meta data found "
                         "as expected ---------")
    return file_meta