def test_default_for_reading_validation_mode(self): raw = RawDataElement(Tag(0x88880002), None, 4, b'unknown', 0, True, True) with pytest.warns(UserWarning): DataElement_from_raw(raw)
def data_element_generator( fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None, defer_size: Optional[Union[int, str, float]] = None, encoding: Union[str, MutableSequence[str]] = default_encoding, specific_tags: Optional[List[BaseTag]] = None ) -> Iterator[Union[RawDataElement, DataElement]]: """Create a generator to efficiently return the raw data elements. .. note:: This function is used internally - usually there is no need to call it from user code. To read data from a DICOM file, :func:`dcmread` shall be used instead. Parameters ---------- fp : file-like The file-like to read from. is_implicit_VR : bool ``True`` if the data is encoded as implicit VR, ``False`` otherwise. is_little_endian : bool ``True`` if the data is encoded as little endian, ``False`` otherwise. stop_when : None, callable, optional If ``None`` (default), then the whole file is read. A callable which takes tag, VR, length, and returns ``True`` or ``False``. If it returns ``True``, ``read_data_element`` will just return. defer_size : int, str or float, optional See :func:`dcmread` for parameter info. encoding : Union[str, MutableSequence[str]] Encoding scheme specific_tags : list or None See :func:`dcmread` for parameter info. Yields ------- RawDataElement or DataElement Yields DataElement for undefined length UN or SQ, RawDataElement otherwise. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element from pydicom.values import convert_string if is_little_endian: endian_chr = "<" else: endian_chr = ">" # assign implicit VR struct to variable as use later if VR assumed missing implicit_VR_struct = Struct(endian_chr + "HHL") if is_implicit_VR: element_struct = implicit_VR_struct else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set() has_tag_set = bool(tag_set) if has_tag_set: tag_set.add(Tag(0x00080005)) # Specific Character Set while True: # VR: Optional[str] # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}" if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) vr = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, vr, length = element_struct_unpack(bytes_read) # defend against switching to implicit VR, some writer do in SQ's # issue 1067, issue 1035 if not (b'AA' <= vr <= b'ZZ') and config.assume_implicit_vr_switch: # invalid VR, must be 2 cap chrs, assume implicit and continue vr = None group, elem, length = implicit_VR_struct.unpack(bytes_read) else: vr = vr.decode(default_encoding) if vr in EXPLICIT_VR_LENGTH_32: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += f" {vr} " if length != 0xFFFFFFFF: debug_msg += f"Length: {length}" else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, vr, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and vr in EXPLICIT_VR_LENGTH_32: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = (fp_read(length) if length > 0 else cast( Optional[bytes], empty_value_for_VR(vr, raw=True))) if debugging: dotdot = "..." if length > 20 else " " displayed_value = value[:20] if value else b'' logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(displayed_value), dotdot, displayed_value, dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): # *Specific Character String* is b'' for empty value encoding = convert_string( cast(bytes, value) or b'', is_little_endian) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, vr, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # VR UN with undefined length shall be handled as SQ # see PS 3.5, section 6.2.2 if vr == VR_.UN and config.settings.infer_sq_for_un_vr: vr = VR_.SQ # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if vr is None or vr == VR_.UN and config.replace_un_with_known_vr: try: vr = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = _unpack_tag(fp_read(4), endian_chr) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: vr = VR_.SQ if vr == VR_.SQ: if debugging: logger_debug( f"{fp_tell():08X}: Reading/parsing undefined length " "sequence") seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, vr, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, vr, length, value, value_tell, is_implicit_VR, is_little_endian)
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding, specific_tags=None): """Create a generator to efficiently return the raw data elements. .. note:: This function is used internally - usually there is no need to call it from user code. To read data from a DICOM file, :func:`dcmread` shall be used instead. Parameters ---------- fp : file-like The file-like to read from. is_implicit_VR : bool ``True`` if the data is encoded as implicit VR, ``False`` otherwise. is_little_endian : bool ``True`` if the data is encoded as little endian, ``False`` otherwise. stop_when : None, callable, optional If ``None`` (default), then the whole file is read. A callable which takes tag, VR, length, and returns ``True`` or ``False``. If it returns ``True``, ``read_data_element`` will just return. defer_size : int, str, None, optional See :func:`dcmread` for parameter info. encoding : Encoding scheme specific_tags : list or None See :func:`dcmread` for parameter info. Returns ------- VR : str or None ``None`` if implicit VR, otherwise the VR read from the file. length : int The length of the DICOM data element (could be DICOM "undefined length" ``0xFFFFFFFFL``) value_bytes : bytes or str The raw bytes from the DICOM file (not parsed into Python types) is_little_endian : bool ``True`` if transfer syntax is little endian; else ``False``. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = set() if specific_tags is not None: for tag in specific_tags: if isinstance(tag, (str, compat.text_type)): tag = Tag(tag_for_keyword(tag)) if isinstance(tag, BaseTag): tag_set.add(tag) tag_set.add(Tag(0x08, 0x05)) has_tag_set = len(tag_set) > 0 while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = (fp_read(length) if length > 0 else empty_value_for_VR(VR, raw=True)) if debugging: dotdot = "..." if length > 12 else " " displayed_value = value[:12] if value else b'' logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(displayed_value), dotdot, displayed_value, dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, # then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian) # Store the encoding value in the generator for use # with future elements (SQs) encoding = convert_encodings(encoding) # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def testTagWithoutEncodingPython3(self): """RawDataElement: raises if no encoding given in Python 3.""" self.assertRaises( TypeError, RawDataElement(Tag(0x00104000), 'LT', 14, b'comment1\\comment2', 0, False, True))
def test_unknown_vr(self): """Test converting a raw element with unknown VR""" raw = RawDataElement(Tag(0x00080000), 'AA', 8, b'20170101', 0, False, True) with pytest.raises(NotImplementedError): DataElement_from_raw(raw, default_encoding)
def test_data_element_without_encoding(self): """RawDataElement: no encoding needed.""" raw = RawDataElement(Tag(0x00104000), 'LT', 23, b'comment\\comment2\\comment3', 0, False, True) element = DataElement_from_raw(raw) assert 'Patient Comments' == element.name
def testTagWithoutEncodingPython2(self): """RawDataElement: no encoding needed in Python 2.""" raw = RawDataElement(Tag(0x00104000), 'LT', 23, b'comment\\comment2\\comment3', 0, False, True) element = DataElement_from_raw(raw) self.assertEqual(element.name, 'Patient Comments')
def setUp(self): # raw data element -> tag VR length value # value_tell is_implicit_VR is_little_endian' # Unknown (not in DICOM dict), non-private, non-group 0 for this test self.raw1 = RawDataElement(Tag(0x88880002), None, 4, 0x1111, 0, True, True)
def read_deferred_data_element(fileobj_type, filename_or_obj, timestamp, raw_data_elem): """Read the previously deferred value from the file into memory and return a raw data element. .. note: This is called internally by pydicom and will normally not be needed in user code. Parameters ---------- fileobj_type : type The type of the original file object. filename_or_obj : str or file-like The filename of the original file if one exists, or the file-like object where the data element persists. timestamp : time or None The time the original file has been read, if not a file-like. raw_data_elem : dataelem.RawDataElement The raw data element with no value set. Returns ------- dataelem.RawDataElement The data element with the value set. Raises ------ IOError If `filename_or_obj` is ``None``. IOError If `filename_or_obj` is a filename and the corresponding file does not exist. ValueError If the VR or tag of `raw_data_elem` does not match the read value. """ logger.debug("Reading deferred element %r" % str(raw_data_elem.tag)) # If it wasn't read from a file, then return an error if filename_or_obj is None: raw = RawDataElement(tag=raw_data_elem.tag, VR=None, length=4, value=b'deferred', value_tell=0, is_implicit_VR=0, is_little_endian=True, is_raw=True) # raw_data_elem.value = 'Deferred' return raw_data_elem # MJC raise IOError("Deferred read -- original filename not stored. " "Cannot re-open") is_filename = isinstance(filename_or_obj, str) # Check that the file is the same as when originally read if is_filename and not os.path.exists(filename_or_obj): raise IOError("Deferred read -- original file " "{0:s} is missing".format(filename_or_obj)) if timestamp is not None: statinfo = os.stat(filename_or_obj) if statinfo.st_mtime != timestamp: warnings.warn("Deferred read warning -- file modification time " "has changed.") # Open the file, position to the right place fp = (fileobj_type(filename_or_obj, 'rb') if is_filename else filename_or_obj) is_implicit_VR = raw_data_elem.is_implicit_VR is_little_endian = raw_data_elem.is_little_endian offset = data_element_offset_to_value(is_implicit_VR, raw_data_elem.VR) fp.seek(raw_data_elem.value_tell - offset) elem_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, defer_size=None) # Read the data element and check matches what was stored before data_elem = next(elem_gen) fp.close() if data_elem.VR != raw_data_elem.VR: raise ValueError("Deferred read VR {0:s} does not match " "original {1:s}".format(data_elem.VR, raw_data_elem.VR)) if data_elem.tag != raw_data_elem.tag: raise ValueError("Deferred read tag {0!r} does not match " "original {1!r}".format(data_elem.tag, raw_data_elem.tag)) # Everything is ok, now this object should act like usual DataElement return data_elem
def test_get_item_slice(self): """Test Dataset.get_item with slice argument""" # adapted from test_getitem_slice ds = Dataset() ds.CommandGroupLength = 120 # 0000,0000 ds.CommandLengthToEnd = 111 # 0000,0001 ds.Overlays = 12 # 0000,51B0 ds.LengthToEnd = 12 # 0008,0001 ds.SOPInstanceUID = '1.2.3.4' # 0008,0018 ds.SkipFrameRangeFlag = 'TEST' # 0008,9460 ds.add_new(0x00090001, 'PN', 'CITIZEN^1') ds.add_new(0x00090002, 'PN', 'CITIZEN^2') ds.add_new(0x00090003, 'PN', 'CITIZEN^3') elem = RawDataElement(0x00090004, 'PN', 9, b'CITIZEN^4', 0, True, True) ds.__setitem__(0x00090004, elem) elem = RawDataElement(0x00090005, 'PN', 9, b'CITIZEN^5', 0, True, True) ds.__setitem__(0x00090005, elem) elem = RawDataElement(0x00090006, 'PN', 9, b'CITIZEN^6', 0, True, True) ds.__setitem__(0x00090006, elem) ds.PatientName = 'CITIZEN^Jan' # 0010,0010 elem = RawDataElement(0x00100020, 'LO', 5, b'12345', 0, True, True) ds.__setitem__(0x00100020, elem) # Patient ID ds.ExaminedBodyThickness = 1.223 # 0010,9431 ds.BeamSequence = [Dataset()] # 300A,00B0 ds.BeamSequence[0].PatientName = 'ANON' # Slice all items - should return original dataset assert ds.get_item(slice(None, None)) == ds # Slice starting from and including (0008,0001) test_ds = ds.get_item(slice(0x00080001, None)) assert 'CommandGroupLength' not in test_ds assert 'CommandLengthToEnd' not in test_ds assert 'Overlays' not in test_ds assert 'LengthToEnd' in test_ds assert 'BeamSequence' in test_ds # Slice ending at and not including (0009,0002) test_ds = ds.get_item(slice(None, 0x00090002)) assert 'CommandGroupLength' in test_ds assert 'CommandLengthToEnd' in test_ds assert 'Overlays' in test_ds assert 'LengthToEnd' in test_ds assert 0x00090001 in test_ds assert 0x00090002 not in test_ds assert 'BeamSequence' not in test_ds # Slice with a step - every second tag # Should return zeroth tag, then second, fourth, etc... test_ds = ds.get_item(slice(None, None, 2)) assert 'CommandGroupLength' in test_ds assert 'CommandLengthToEnd' not in test_ds assert 0x00090001 in test_ds assert 0x00090002 not in test_ds # Slice starting at and including (0008,0018) and ending at and not # including (0009,0008) test_ds = ds.get_item(slice(0x00080018, 0x00090006)) assert 'SOPInstanceUID' in test_ds assert 0x00090005 in test_ds assert 0x00090006 not in test_ds # Slice starting at and including (0008,0018) and ending at and not # including (0009,0006), every third element test_ds = ds.get_item(slice(0x00080018, 0x00090008, 3)) assert 'SOPInstanceUID' in test_ds assert 0x00090001 not in test_ds assert 0x00090002 in test_ds assert not test_ds.get_item(0x00090002).is_raw assert 0x00090003 not in test_ds assert 0x00090004 not in test_ds assert 0x00090005 in test_ds assert test_ds.get_item(0x00090005).is_raw assert 0x00090006 not in test_ds # Slice starting and ending (and not including) (0008,0018) assert ds.get_item(slice((0x0008, 0x0018), (0x0008, 0x0018))) == Dataset() # Test slicing using other acceptable Tag initialisations assert 'SOPInstanceUID' in ds.get_item(slice(0x00080018, 0x00080019)) assert 'SOPInstanceUID' in ds.get_item( slice((0x0008, 0x0018), (0x0008, 0x0019))) assert 'SOPInstanceUID' in ds.get_item( slice('0x00080018', '0x00080019'))
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding): """Create a generator to efficiently return the raw data elements. Parameters ---------- fp : file-like object is_implicit_VR : boolean is_little_endian : boolean stop_when : None, callable, optional If None (default), then the whole file is read. A callable which takes tag, VR, length, and returns True or False. If it returns True, read_data_element will raise StopIteration. defer_size : int, str, None, optional See ``read_file`` for parameter info. encoding : Encoding scheme Returns ------- VR : None if implicit VR, otherwise the VR read from the file length : the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL) value_bytes : the raw bytes from the DICOM file (not parsed into python types) is_little_endian : boolean True if transfer syntax is little endian; else False. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: raise StopIteration # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) raise StopIteration # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: if defer_size is not None and length > defer_size: # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: # Instead of readng all data at once, read in 1Gb chuncks. # For some reason the reads don't work correctly for data # elements greater than 4Gb but this might be an underlying issue # in system specific python implementations. # This is a fix for large data elements whihc should have # no performance implications for "normal" data. buf_size = 1073741824 value = '' if length > buf_size: value = fp_read(buf_size) len_to_read = length - buf_size while len_to_read > 0: if len_to_read > buf_size: value = value + fp_read(buf_size) len_to_read = len_to_read - buf_size else: value = value + fp_read(len_to_read) break else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex( value[:12]), dotdot, value[:12], dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionaryVR(tag) except KeyError: # Look ahead to see if it consists of items and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def test_wrong_bytes_length_exception(self, accept_wrong_length): """Check exception when number of raw bytes is not correct.""" raw = RawDataElement(Tag(0x00190000), 'FD', 1, b'1', 0, False, True) with pytest.raises(BytesLengthException): DataElement_from_raw(raw)