Python RawDataElement 예제들, pydicom.dataelem.RawDataElement Python 예제들

예제 #1

0

파일 보기

파일: test_config.py 프로젝트: pydicom/pydicom

 def test_default_for_reading_validation_mode(self):
     raw = RawDataElement(Tag(0x88880002), None, 4, b'unknown', 0, True,
                          True)
     with pytest.warns(UserWarning):
         DataElement_from_raw(raw)

예제 #2

0

파일 보기

def data_element_generator(
    fp: BinaryIO,
    is_implicit_VR: bool,
    is_little_endian: bool,
    stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None,
    defer_size: Optional[Union[int, str, float]] = None,
    encoding: Union[str, MutableSequence[str]] = default_encoding,
    specific_tags: Optional[List[BaseTag]] = None
) -> Iterator[Union[RawDataElement, DataElement]]:
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str or float, optional
        See :func:`dcmread` for parameter info.
    encoding : Union[str, MutableSequence[str]]
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Yields
    -------
    RawDataElement or DataElement
        Yields DataElement for undefined length UN or SQ, RawDataElement
        otherwise.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element
    from pydicom.values import convert_string

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"

    # assign implicit VR struct to variable as use later if VR assumed missing
    implicit_VR_struct = Struct(endian_chr + "HHL")
    if is_implicit_VR:
        element_struct = implicit_VR_struct
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set()
    has_tag_set = bool(tag_set)
    if has_tag_set:
        tag_set.add(Tag(0x00080005))  # Specific Character Set

    while True:
        # VR: Optional[str]
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file

        if debugging:
            debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}"

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            vr = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, vr, length = element_struct_unpack(bytes_read)
            # defend against switching to implicit VR, some writer do in SQ's
            # issue 1067, issue 1035

            if not (b'AA' <= vr <= b'ZZ') and config.assume_implicit_vr_switch:
                # invalid VR, must be 2 cap chrs, assume implicit and continue
                vr = None
                group, elem, length = implicit_VR_struct.unpack(bytes_read)
            else:
                vr = vr.decode(default_encoding)
                if vr in EXPLICIT_VR_LENGTH_32:
                    bytes_read = fp_read(4)
                    length = extra_length_unpack(bytes_read)[0]
                    if debugging:
                        debug_msg += " " + bytes2hex(bytes_read)

        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += f" {vr} "
            if length != 0xFFFFFFFF:
                debug_msg += f"Length: {length}"
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, vr, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and vr in EXPLICIT_VR_LENGTH_32:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size
                    and tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (fp_read(length) if length > 0 else cast(
                    Optional[bytes], empty_value_for_VR(vr, raw=True)))
                if debugging:
                    dotdot = "..." if length > 20 else "   "
                    displayed_value = value[:20] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                # *Specific Character String* is b'' for empty value
                encoding = convert_string(
                    cast(bytes, value) or b'', is_little_endian)
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, vr, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # VR UN with undefined length shall be handled as SQ
            # see PS 3.5, section 6.2.2
            if vr == VR_.UN and config.settings.infer_sq_for_un_vr:
                vr = VR_.SQ
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if vr is None or vr == VR_.UN and config.replace_un_with_known_vr:
                try:
                    vr = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = _unpack_tag(fp_read(4), endian_chr)
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        vr = VR_.SQ

            if vr == VR_.SQ:
                if debugging:
                    logger_debug(
                        f"{fp_tell():08X}: Reading/parsing undefined length "
                        "sequence")

                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue

                yield DataElement(tag,
                                  vr,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue

                yield RawDataElement(tag, vr, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)

예제 #3

0

파일 보기

파일: filereader.py 프로젝트: sjoerdk/pydicom

def data_element_generator(fp,
                           is_implicit_VR,
                           is_little_endian,
                           stop_when=None,
                           defer_size=None,
                           encoding=default_encoding,
                           specific_tags=None):
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str, None, optional
        See :func:`dcmread` for parameter info.
    encoding :
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Returns
    -------
    VR : str or None
        ``None`` if implicit VR, otherwise the VR read from the file.
    length : int
        The length of the DICOM data element (could be DICOM "undefined
        length" ``0xFFFFFFFFL``)
    value_bytes : bytes or str
        The raw bytes from the DICOM file (not parsed into Python types)
    is_little_endian : bool
        ``True`` if transfer syntax is little endian; else ``False``.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = set()
    if specific_tags is not None:
        for tag in specific_tags:
            if isinstance(tag, (str, compat.text_type)):
                tag = Tag(tag_for_keyword(tag))
            if isinstance(tag, BaseTag):
                tag_set.add(tag)
        tag_set.add(Tag(0x08, 0x05))
    has_tag_set = len(tag_set) > 0

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size
                    and tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (fp_read(length)
                         if length > 0 else empty_value_for_VR(VR, raw=True))
                if debugging:
                    dotdot = "..." if length > 12 else "   "
                    displayed_value = value[:12] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                from pydicom.values import convert_string
                encoding = convert_string(value, is_little_endian)
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue
                yield DataElement(tag,
                                  VR,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set,
                # then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value, is_little_endian)
                    # Store the encoding value in the generator for use
                    # with future elements (SQs)
                    encoding = convert_encodings(encoding)

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue
                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)

예제 #4

0

파일 보기

파일: test_dataelem.py 프로젝트: rhaxton/pydicom

 def testTagWithoutEncodingPython3(self):
     """RawDataElement: raises if no encoding given in Python 3."""
     self.assertRaises(
         TypeError,
         RawDataElement(Tag(0x00104000), 'LT', 14, b'comment1\\comment2', 0,
                        False, True))

예제 #5

0

파일 보기

파일: test_dataelem.py 프로젝트: rhaxton/pydicom

 def test_unknown_vr(self):
     """Test converting a raw element with unknown VR"""
     raw = RawDataElement(Tag(0x00080000), 'AA', 8, b'20170101', 0, False,
                          True)
     with pytest.raises(NotImplementedError):
         DataElement_from_raw(raw, default_encoding)

예제 #6

0

파일 보기

 def test_data_element_without_encoding(self):
     """RawDataElement: no encoding needed."""
     raw = RawDataElement(Tag(0x00104000), 'LT', 23,
                          b'comment\\comment2\\comment3', 0, False, True)
     element = DataElement_from_raw(raw)
     assert 'Patient Comments' == element.name

예제 #7

0

파일 보기

파일: test_dataelem.py 프로젝트: rhaxton/pydicom

 def testTagWithoutEncodingPython2(self):
     """RawDataElement: no encoding needed in Python 2."""
     raw = RawDataElement(Tag(0x00104000), 'LT', 23,
                          b'comment\\comment2\\comment3', 0, False, True)
     element = DataElement_from_raw(raw)
     self.assertEqual(element.name, 'Patient Comments')

예제 #8

0

파일 보기

파일: test_dataelem.py 프로젝트: sehriff/pydicom

 def setUp(self):
     # raw data element -> tag VR length value
     #                       value_tell is_implicit_VR is_little_endian'
     # Unknown (not in DICOM dict), non-private, non-group 0 for this test
     self.raw1 = RawDataElement(Tag(0x88880002), None, 4, 0x1111, 0, True,
                                True)

예제 #9

0

파일 보기

파일: filereader.py 프로젝트: wassname/pydicom

def read_deferred_data_element(fileobj_type, filename_or_obj, timestamp,
                               raw_data_elem):
    """Read the previously deferred value from the file into memory
    and return a raw data element.

    .. note:

        This is called internally by pydicom and will normally not be
        needed in user code.

    Parameters
    ----------
    fileobj_type : type
        The type of the original file object.
    filename_or_obj : str or file-like
        The filename of the original file if one exists, or the file-like
        object where the data element persists.
    timestamp : time or None
        The time the original file has been read, if not a file-like.
    raw_data_elem : dataelem.RawDataElement
        The raw data element with no value set.

    Returns
    -------
    dataelem.RawDataElement
        The data element with the value set.

    Raises
    ------
    IOError
        If `filename_or_obj` is ``None``.
    IOError
        If `filename_or_obj` is a filename and the corresponding file does
        not exist.
    ValueError
        If the VR or tag of `raw_data_elem` does not match the read value.
    """
    logger.debug("Reading deferred element %r" % str(raw_data_elem.tag))
    # If it wasn't read from a file, then return an error
    if filename_or_obj is None:
        raw = RawDataElement(tag=raw_data_elem.tag, VR=None, length=4, value=b'deferred', value_tell=0, is_implicit_VR=0, is_little_endian=True, is_raw=True)
        # raw_data_elem.value = 'Deferred'
        return raw_data_elem
        # MJC
        raise IOError("Deferred read -- original filename not stored. "
                      "Cannot re-open")
    is_filename = isinstance(filename_or_obj, str)

    # Check that the file is the same as when originally read
    if is_filename and not os.path.exists(filename_or_obj):
        raise IOError("Deferred read -- original file "
                      "{0:s} is missing".format(filename_or_obj))
    if timestamp is not None:
        statinfo = os.stat(filename_or_obj)
        if statinfo.st_mtime != timestamp:
            warnings.warn("Deferred read warning -- file modification time "
                          "has changed.")

    # Open the file, position to the right place
    fp = (fileobj_type(filename_or_obj, 'rb')
          if is_filename else filename_or_obj)
    is_implicit_VR = raw_data_elem.is_implicit_VR
    is_little_endian = raw_data_elem.is_little_endian
    offset = data_element_offset_to_value(is_implicit_VR, raw_data_elem.VR)
    fp.seek(raw_data_elem.value_tell - offset)
    elem_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                      defer_size=None)

    # Read the data element and check matches what was stored before
    data_elem = next(elem_gen)
    fp.close()
    if data_elem.VR != raw_data_elem.VR:
        raise ValueError("Deferred read VR {0:s} does not match "
                         "original {1:s}".format(data_elem.VR,
                                                 raw_data_elem.VR))
    if data_elem.tag != raw_data_elem.tag:
        raise ValueError("Deferred read tag {0!r} does not match "
                         "original {1!r}".format(data_elem.tag,
                                                 raw_data_elem.tag))

    # Everything is ok, now this object should act like usual DataElement
    return data_elem

예제 #10

0

파일 보기

    def test_get_item_slice(self):
        """Test Dataset.get_item with slice argument"""
        # adapted from test_getitem_slice
        ds = Dataset()
        ds.CommandGroupLength = 120  # 0000,0000
        ds.CommandLengthToEnd = 111  # 0000,0001
        ds.Overlays = 12  # 0000,51B0
        ds.LengthToEnd = 12  # 0008,0001
        ds.SOPInstanceUID = '1.2.3.4'  # 0008,0018
        ds.SkipFrameRangeFlag = 'TEST'  # 0008,9460
        ds.add_new(0x00090001, 'PN', 'CITIZEN^1')
        ds.add_new(0x00090002, 'PN', 'CITIZEN^2')
        ds.add_new(0x00090003, 'PN', 'CITIZEN^3')
        elem = RawDataElement(0x00090004, 'PN', 9, b'CITIZEN^4', 0, True, True)
        ds.__setitem__(0x00090004, elem)
        elem = RawDataElement(0x00090005, 'PN', 9, b'CITIZEN^5', 0, True, True)
        ds.__setitem__(0x00090005, elem)
        elem = RawDataElement(0x00090006, 'PN', 9, b'CITIZEN^6', 0, True, True)
        ds.__setitem__(0x00090006, elem)
        ds.PatientName = 'CITIZEN^Jan'  # 0010,0010
        elem = RawDataElement(0x00100020, 'LO', 5, b'12345', 0, True, True)
        ds.__setitem__(0x00100020, elem)  # Patient ID
        ds.ExaminedBodyThickness = 1.223  # 0010,9431
        ds.BeamSequence = [Dataset()]  # 300A,00B0
        ds.BeamSequence[0].PatientName = 'ANON'

        # Slice all items - should return original dataset
        assert ds.get_item(slice(None, None)) == ds

        # Slice starting from and including (0008,0001)
        test_ds = ds.get_item(slice(0x00080001, None))
        assert 'CommandGroupLength' not in test_ds
        assert 'CommandLengthToEnd' not in test_ds
        assert 'Overlays' not in test_ds
        assert 'LengthToEnd' in test_ds
        assert 'BeamSequence' in test_ds

        # Slice ending at and not including (0009,0002)
        test_ds = ds.get_item(slice(None, 0x00090002))
        assert 'CommandGroupLength' in test_ds
        assert 'CommandLengthToEnd' in test_ds
        assert 'Overlays' in test_ds
        assert 'LengthToEnd' in test_ds
        assert 0x00090001 in test_ds
        assert 0x00090002 not in test_ds
        assert 'BeamSequence' not in test_ds

        # Slice with a step - every second tag
        # Should return zeroth tag, then second, fourth, etc...
        test_ds = ds.get_item(slice(None, None, 2))
        assert 'CommandGroupLength' in test_ds
        assert 'CommandLengthToEnd' not in test_ds
        assert 0x00090001 in test_ds
        assert 0x00090002 not in test_ds

        # Slice starting at and including (0008,0018) and ending at and not
        #   including (0009,0008)
        test_ds = ds.get_item(slice(0x00080018, 0x00090006))
        assert 'SOPInstanceUID' in test_ds
        assert 0x00090005 in test_ds
        assert 0x00090006 not in test_ds

        # Slice starting at and including (0008,0018) and ending at and not
        #   including (0009,0006), every third element
        test_ds = ds.get_item(slice(0x00080018, 0x00090008, 3))
        assert 'SOPInstanceUID' in test_ds
        assert 0x00090001 not in test_ds
        assert 0x00090002 in test_ds
        assert not test_ds.get_item(0x00090002).is_raw
        assert 0x00090003 not in test_ds
        assert 0x00090004 not in test_ds
        assert 0x00090005 in test_ds
        assert test_ds.get_item(0x00090005).is_raw
        assert 0x00090006 not in test_ds

        # Slice starting and ending (and not including) (0008,0018)
        assert ds.get_item(slice((0x0008, 0x0018),
                                 (0x0008, 0x0018))) == Dataset()

        # Test slicing using other acceptable Tag initialisations
        assert 'SOPInstanceUID' in ds.get_item(slice(0x00080018, 0x00080019))
        assert 'SOPInstanceUID' in ds.get_item(
            slice((0x0008, 0x0018), (0x0008, 0x0019)))
        assert 'SOPInstanceUID' in ds.get_item(
            slice('0x00080018', '0x00080019'))

예제 #11

0

파일 보기

파일: filereader.py 프로젝트: parneshr/pydicom

def data_element_generator(fp,
                           is_implicit_VR,
                           is_little_endian,
                           stop_when=None,
                           defer_size=None,
                           encoding=default_encoding):
    """Create a generator to efficiently return the raw data elements.

    Parameters
    ----------
    fp : file-like object
    is_implicit_VR : boolean
    is_little_endian : boolean
    stop_when : None, callable, optional
        If None (default), then the whole file is read.
        A callable which takes tag, VR, length,
        and returns True or False. If it returns True,
        read_data_element will raise StopIteration.
    defer_size : int, str, None, optional
        See ``read_file`` for parameter info.
    encoding :
        Encoding scheme

    Returns
    -------
    VR : None if implicit VR, otherwise the VR read from the file
    length :
        the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL)
    value_bytes :
        the raw bytes from the DICOM file
        (not parsed into python types)
    is_little_endian : boolean
        True if transfer syntax is little endian; else False.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            raise StopIteration  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            if defer_size is not None and length > defer_size:
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                # Instead of readng all data at once, read in 1Gb chuncks.
                # For some reason the reads don't work correctly for data
                # elements greater than 4Gb but this might be an underlying issue
                # in system specific python implementations.
                # This is a fix for large data elements whihc should have
                # no performance implications for "normal" data.
                buf_size = 1073741824
                value = ''
                if length > buf_size:

                    value = fp_read(buf_size)
                    len_to_read = length - buf_size
                    while len_to_read > 0:
                        if len_to_read > buf_size:
                            value = value + fp_read(buf_size)
                            len_to_read = len_to_read - buf_size
                        else:
                            value = value + fp_read(len_to_read)
                            break
                else:
                    value = fp_read(length)

                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(
                                     value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == (0x08, 0x05):
                from pydicom.values import convert_string
                encoding = convert_string(value,
                                          is_little_endian,
                                          encoding=default_encoding)
                # Store the encoding value in the generator for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                yield DataElement(tag,
                                  VR,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set, then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value,
                                              is_little_endian,
                                              encoding=default_encoding)
                    # Store the encoding value in the generator for use with future elements (SQs)
                    encoding = convert_encodings(encoding)

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)

예제 #12

0

파일 보기

파일: test_dataelem.py 프로젝트: dillonwilliams/pydicom

 def test_wrong_bytes_length_exception(self, accept_wrong_length):
     """Check exception when number of raw bytes is not correct."""
     raw = RawDataElement(Tag(0x00190000), 'FD', 1, b'1', 0, False, True)
     with pytest.raises(BytesLengthException):
         DataElement_from_raw(raw)