def dataElementToValue(dataElement, datasetencoding, subvalue=None): if subvalue != None: value = subvalue else: value = dataElement.value if value == '' or value == None: return None if type(value) in [str, int, float, list]: return value if type(value) == bytes: pythonencoding = convert_encodings([datasetencoding]) return value.decode(pythonencoding[0]) if type(value) == MultiValue: return [ dataElementToValue(dataElement, datasetencoding, subvalue) for subvalue in value ] if dataElement.VR == 'PN': if len(str(value)) > 0: return {'Alphabetic': str(value)} return None if dataElement.VR == 'DA': return [value.isoformat()] if dataElement.VR == 'IS': return int(value) if dataElement.VR == 'UI' or dataElement.VR == 'AT': return str(value) if dataElement.VR == 'SQ': return [datasetToJSON(subvalue, datasetencoding) for subvalue in value] return value.original_string
def write_data_element(fp, data_element, encoding=default_encoding): """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(data_element.tag) # If explicit VR, write the VR VR = data_element.VR if not fp.is_implicit_VR: if len(VR) != 2: msg = "Cannot write ambiguous VR of '%s' for data element with tag %r." % (VR, data_element.tag) msg += "\nSet the correct VR before writing, or use an implicit VR transfer syntax" raise ValueError(msg) if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes if VR not in writers: raise NotImplementedError("write_data_element: unknown Value Representation '{0}'".format(VR)) length_location = fp.tell() # save location for later. if not fp.is_implicit_VR and VR not in extra_length_VRs: fp.write_US(0) # Explicit VR length field is only 2 bytes else: fp.write_UL(0xFFFFFFFF) # will fill in real length value later if not undefined length item encoding = convert_encodings(encoding) writer_function, writer_param = writers[VR] if VR in text_VRs: writer_function(fp, data_element, encoding=encoding[1]) elif VR in ('PN', 'SQ'): writer_function(fp, data_element, encoding=encoding) else: # Many numeric types use the same writer but with numeric format parameter if writer_param is not None: writer_function(fp, data_element, writer_param) else: writer_function(fp, data_element) # print DataElement(tag, VR, value) is_undefined_length = False if hasattr(data_element, "is_undefined_length") and data_element.is_undefined_length: is_undefined_length = True location = fp.tell() fp.seek(length_location) if not fp.is_implicit_VR and VR not in extra_length_VRs: fp.write_US(location - length_location - 2) # 2 is length of US else: # write the proper length of the data_element back in the length slot, unless is SQ with undefined length. if not is_undefined_length: fp.write_UL(location - length_location - 4) # 4 is length of UL fp.seek(location) # ready for next data_element if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def write_data_element(fp, data_element, encoding=default_encoding): """Write the data_element to file fp according to dicom media storage rules. """ fp.write_tag(data_element.tag) VR = data_element.VR if not fp.is_implicit_VR: if len(VR) != 2: msg = "Cannot write ambiguous VR of '%s' for data element with tag %r." % (VR, data_element.tag) msg += "\nSet the correct VR before writing, or use an implicit VR transfer syntax" raise ValueError(msg) if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes if VR not in writers: raise NotImplementedError("write_data_element: unknown Value Representation '{0}'".format(VR)) length_location = fp.tell() # save location for later. if not fp.is_implicit_VR and VR not in ['OB', 'OW', 'OF', 'SQ', 'UT', 'UN']: fp.write_US(0) # Explicit VR length field is only 2 bytes else: fp.write_UL(0xFFFFFFFF) # will fill in real length value later if not undefined length item encoding = convert_encodings(encoding) writer_function, writer_param = writers[VR] if VR in text_VRs: writer_function(fp, data_element, encoding=encoding[1]) elif VR in ('PN', 'SQ'): writer_function(fp, data_element, encoding=encoding) else: # Many numeric types use the same writer but with numeric format parameter if writer_param is not None: writer_function(fp, data_element, writer_param) else: writer_function(fp, data_element) # print DataElement(tag, VR, value) is_undefined_length = False if hasattr(data_element, "is_undefined_length") and data_element.is_undefined_length: is_undefined_length = True location = fp.tell() fp.seek(length_location) if not fp.is_implicit_VR and VR not in ['OB', 'OW', 'OF', 'SQ', 'UT', 'UN']: fp.write_US(location - length_location - 2) # 2 is length of US else: # write the proper length of the data_element back in the length slot, unless is SQ with undefined length. if not is_undefined_length: fp.write_UL(location - length_location - 4) # 4 is length of UL fp.seek(location) # ready for next data_element if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def _character_set(self): char_set = self.get('SpecificCharacterSet', None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set
def _character_set(self): char_set = self.get("SpecificCharacterSet", None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set
def _character_set(self): """The Dataset's SpecificCharacterSet value (if present).""" char_set = self.get('SpecificCharacterSet', None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None): """Return a Dataset instance containing the next dataset in the file. Parameters ---------- fp : an opened file object is_implicit_VR : boolean True if file transfer syntax is implicit VR. is_little_endian : boolean True if file has little endian transfer syntax. bytelength : int, None, optional None to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional optional call_back function which can terminate reading. See help for data_element_generator for details defer_size : int, None, optional Size to avoid loading large elements in memory. See ``dcmread`` for more parameter info. parent_encoding : optional encoding to use as a default in case a Specific Character Set (0008,0005) isn't specified specific_tags : list or None See ``dcmread`` for parameter info. Returns ------- a Dataset instance See Also -------- pydicom.dataset.Dataset A collection (dictionary) of Dicom `DataElement` instances. """ raw_data_elements = dict() fpStart = fp.tell() de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fpStart < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: # XXX is this error visible enough to user code with just logging? logger.error( str(details) + " in file " + getattr(fp, "name", "<no filename>")) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) if 0x00080005 in raw_data_elements: char_set = DataElement_from_raw(raw_data_elements[0x00080005]) encoding = convert_encodings(char_set) else: encoding = parent_encoding ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding, specific_tags=None): """Create a generator to efficiently return the raw data elements. Parameters ---------- fp : file-like object is_implicit_VR : boolean is_little_endian : boolean stop_when : None, callable, optional If None (default), then the whole file is read. A callable which takes tag, VR, length, and returns True or False. If it returns True, read_data_element will just return. defer_size : int, str, None, optional See ``dcmread`` for parameter info. encoding : Encoding scheme specific_tags : list or None See ``dcmread`` for parameter info. Returns ------- VR : None if implicit VR, otherwise the VR read from the file length : the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL) value_bytes : the raw bytes from the DICOM file (not parsed into python types) is_little_endian : boolean True if transfer syntax is little endian; else False. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = set() has_specific_char_set = True if specific_tags is not None: for tag in specific_tags: if isinstance(tag, (str, compat.text_type)): tag = Tag(tag_for_keyword(tag)) if isinstance(tag, BaseTag): tag_set.add(tag) has_specific_char_set = Tag(0x08, 0x05) in tag_set tag_set.add(Tag(0x08, 0x05)) has_tag_set = len(tag_set) > 0 while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex( value[:12]), dotdot, value[:12], dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) if not has_specific_char_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, # then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use # with future elements (SQs) encoding = convert_encodings(encoding) if not has_specific_char_set: continue # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def write_data_element(fp, data_element, encodings=None): """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(data_element.tag) # If explicit VR, write the VR VR = data_element.VR if not fp.is_implicit_VR: if len(VR) != 2: msg = ("Cannot write ambiguous VR of '{}' for data element with " "tag {}.\nSet the correct VR before writing, or use an " "implicit VR transfer syntax".format( VR, repr(data_element.tag))) raise ValueError(msg) if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes # write into a buffer to avoid seeking back which can be expansive buffer = DicomBytesIO() buffer.is_little_endian = fp.is_little_endian buffer.is_implicit_VR = fp.is_implicit_VR if data_element.is_raw: # raw data element values can be written as they are buffer.write(data_element.value) is_undefined_length = data_element.length == 0xFFFFFFFF else: if VR not in writers: raise NotImplementedError( "write_data_element: unknown Value Representation " "'{0}'".format(VR)) encodings = encodings or [default_encoding] encodings = convert_encodings(encodings) writer_function, writer_param = writers[VR] is_undefined_length = data_element.is_undefined_length if VR in text_VRs or VR in ('PN', 'SQ'): writer_function(buffer, data_element, encodings=encodings) else: # Many numeric types use the same writer but with numeric format # parameter if writer_param is not None: writer_function(buffer, data_element, writer_param) else: writer_function(buffer, data_element) # valid pixel data with undefined length shall contain encapsulated # data, e.g. sequence items - raise ValueError otherwise (see #238) if is_undefined_length and data_element.tag == 0x7fe00010: val = data_element.value if (fp.is_little_endian and not val.startswith(b'\xfe\xff\x00\xe0') or not fp.is_little_endian and not val.startswith(b'\xff\xfe\xe0\x00')): raise ValueError('Pixel Data with undefined length must ' 'start with an item tag') value_length = buffer.tell() if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length): fp.write_US(value_length) # Explicit VR length field is only 2 bytes else: # write the proper length of the data_element in the length slot, # unless is SQ with undefined length. fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length) fp.write(buffer.getvalue()) if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None, at_top_level=True): """Return a :class:`~pydicom.dataset.Dataset` instance containing the next dataset in the file. Parameters ---------- fp : file-like An opened file-like object. is_implicit_VR : bool ``True`` if file transfer syntax is implicit VR. is_little_endian : bool ``True`` if file has little endian transfer syntax. bytelength : int, None, optional ``None`` to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional Optional call_back function which can terminate reading. See help for :func:`data_element_generator` for details defer_size : int, None, optional Size to avoid loading large elements in memory. See :func:`dcmread` for more parameter info. parent_encoding : Optional encoding to use as a default in case (0008,0005) *Specific Character Set* isn't specified. specific_tags : list or None See :func:`dcmread` for parameter info. at_top_level: bool If dataset is top level (not within a sequence). Used to turn off explicit VR heuristic within sequences Returns ------- dataset.Dataset A Dataset instance. See Also -------- :class:`~pydicom.dataset.Dataset` A collection (dictionary) of DICOM :class:`~pydicom.dataelem.DataElement` instances. """ raw_data_elements = dict() fp_start = fp.tell() if at_top_level: is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian, stop_when) fp.seek(fp_start) de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fp_start < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: if config.enforce_valid_values: raise msg = str(details) + " in file " + getattr(fp, "name", "<no filename>") warnings.warn(msg, UserWarning) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) if 0x00080005 in raw_data_elements: char_set = DataElement_from_raw(raw_data_elements[0x00080005]) encoding = convert_encodings(char_set) else: encoding = parent_encoding ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def write_data_element( fp: DicomIO, elem: Union[DataElement, RawDataElement], encodings: Optional[Union[str, List[str]]] = None) -> None: """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(elem.tag) # write into a buffer to avoid seeking back which can be expansive buffer = DicomBytesIO() buffer.is_little_endian = fp.is_little_endian buffer.is_implicit_VR = fp.is_implicit_VR VR: Optional[str] = elem.VR if not fp.is_implicit_VR and VR and len(VR) != 2: msg = (f"Cannot write ambiguous VR of '{VR}' for data element with " f"tag {repr(elem.tag)}.\nSet the correct VR before " f"writing, or use an implicit VR transfer syntax") raise ValueError(msg) if elem.is_raw: elem = cast(RawDataElement, elem) # raw data element values can be written as they are buffer.write(cast(bytes, elem.value)) is_undefined_length = elem.length == 0xFFFFFFFF else: elem = cast(DataElement, elem) if VR not in writers: raise NotImplementedError( f"write_data_element: unknown Value Representation '{VR}'") encodings = encodings or [default_encoding] encodings = convert_encodings(encodings) fn, param = writers[VR] is_undefined_length = elem.is_undefined_length if not elem.is_empty: if VR in text_VRs or VR in ('PN', 'SQ'): fn(buffer, elem, encodings=encodings) # type: ignore[operator] else: # Many numeric types use the same writer but with # numeric format parameter if param is not None: fn(buffer, elem, param) # type: ignore[operator] else: fn(buffer, elem) # type: ignore[operator] # valid pixel data with undefined length shall contain encapsulated # data, e.g. sequence items - raise ValueError otherwise (see #238) if is_undefined_length and elem.tag == 0x7fe00010: encap_item = b'\xfe\xff\x00\xe0' if not fp.is_little_endian: # Non-conformant endianness encap_item = b'\xff\xfe\xe0\x00' if not cast(bytes, elem.value).startswith(encap_item): raise ValueError( "(7FE0,0010) Pixel Data has an undefined length indicating " "that it's compressed, but the data isn't encapsulated as " "required. See pydicom.encaps.encapsulate() for more " "information") value_length = buffer.tell() if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length and value_length > 0xffff): # see PS 3.5, section 6.2.2 for handling of this case msg = ( f"The value for the data element {elem.tag} exceeds the " f"size of 64 kByte and cannot be written in an explicit transfer " f"syntax. The data element VR is changed from '{VR}' to 'UN' " f"to allow saving the data.") warnings.warn(msg) VR = 'UN' # write the VR for explicit transfer syntax if not fp.is_implicit_VR: VR = cast(str, VR) fp.write(bytes(VR, default_encoding)) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length): fp.write_US(value_length) # Explicit VR length field is 2 bytes else: # write the proper length of the data_element in the length slot, # unless is SQ with undefined length. fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length) fp.write(buffer.getvalue()) if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def write_data_element(fp, data_element, encoding=default_encoding): """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(data_element.tag) # If explicit VR, write the VR VR = data_element.VR if not fp.is_implicit_VR: if len(VR) != 2: msg = ("Cannot write ambiguous VR of '{}' for data element with " "tag {}.\nSet the correct VR before writing, or use an " "implicit VR transfer syntax".format( VR, repr(data_element.tag))) raise ValueError(msg) if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes if VR not in writers: raise NotImplementedError( "write_data_element: unknown Value Representation '{0}'".format( VR)) length_location = fp.tell() # save location for later. if not fp.is_implicit_VR and VR not in extra_length_VRs: fp.write_US(0) # Explicit VR length field is only 2 bytes else: fp.write_UL( 0xFFFFFFFF ) # will fill in real length value later if not undefined length item encoding = convert_encodings(encoding) writer_function, writer_param = writers[VR] if VR in text_VRs: writer_function(fp, data_element, encoding=encoding[1]) elif VR in ('PN', 'SQ'): writer_function(fp, data_element, encoding=encoding) else: # Many numeric types use the same writer but with numeric format # parameter if writer_param is not None: writer_function(fp, data_element, writer_param) else: writer_function(fp, data_element) # print DataElement(tag, VR, value) is_undefined_length = False if (hasattr(data_element, "is_undefined_length") and data_element.is_undefined_length): is_undefined_length = True # valid pixel data with undefined length shall contain encapsulated # data, e.g. sequence items - raise ValueError otherwise (see #238) if data_element.tag == 0x7fe00010: # pixel data val = data_element.value if (fp.is_little_endian and not val.startswith(b'\xfe\xff\x00\xe0') or not fp.is_little_endian and not val.startswith(b'\xff\xfe\xe0\x00')): raise ValueError('Pixel Data with undefined length must ' 'start with an item tag') location = fp.tell() fp.seek(length_location) if not fp.is_implicit_VR and VR not in extra_length_VRs: fp.write_US(location - length_location - 2) # 2 is length of US else: # write the proper length of the data_element back in the length slot, # unless is SQ with undefined length. if not is_undefined_length: fp.write_UL(location - length_location - 4) # 4 is length of UL fp.seek(location) # ready for next data_element if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def write_data_element(fp, data_element, encodings=None): """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(data_element.tag) # If explicit VR, write the VR VR = data_element.VR if not fp.is_implicit_VR: if len(VR) != 2: msg = ("Cannot write ambiguous VR of '{}' for data element with " "tag {}.\nSet the correct VR before writing, or use an " "implicit VR transfer syntax".format( VR, repr(data_element.tag))) raise ValueError(msg) if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes # write into a buffer to avoid seeking back which can be expansive buffer = DicomBytesIO() buffer.is_little_endian = fp.is_little_endian buffer.is_implicit_VR = fp.is_implicit_VR if data_element.is_raw: # raw data element values can be written as they are buffer.write(data_element.value) is_undefined_length = data_element.length == 0xFFFFFFFF else: if VR not in writers: raise NotImplementedError( "write_data_element: unknown Value Representation " "'{0}'".format(VR)) encodings = encodings or [default_encoding] encodings = convert_encodings(encodings) writer_function, writer_param = writers[VR] is_undefined_length = data_element.is_undefined_length if VR in text_VRs or VR in ('PN', 'SQ'): writer_function(buffer, data_element, encodings=encodings) else: # Many numeric types use the same writer but with numeric format # parameter if writer_param is not None: writer_function(buffer, data_element, writer_param) else: writer_function(buffer, data_element) # valid pixel data with undefined length shall contain encapsulated # data, e.g. sequence items - raise ValueError otherwise (see #238) if is_undefined_length and data_element.tag == 0x7fe00010: val = data_element.value if (fp.is_little_endian and not val.startswith(b'\xfe\xff\x00\xe0') or not fp.is_little_endian and not val.startswith(b'\xff\xfe\xe0\x00')): raise ValueError('Pixel Data with undefined length must ' 'start with an item tag') value_length = buffer.tell() if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length): try: fp.write_US(value_length) # Explicit VR length field is 2 bytes except struct.error: msg = ('The value for the data element {} exceeds the size ' 'of 64 kByte and cannot be written in an explicit transfer ' 'syntax. You can save it using Implicit Little Endian ' 'transfer syntax, or you have to truncate the value to not ' 'exceed the maximum size of 64 kByte.' .format(data_element.tag)) raise ValueError(msg) else: # write the proper length of the data_element in the length slot, # unless is SQ with undefined length. fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length) fp.write(buffer.getvalue()) if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def data_element_generator( fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None, defer_size: Optional[Union[int, str, float]] = None, encoding: Union[str, MutableSequence[str]] = default_encoding, specific_tags: Optional[List[BaseTag]] = None ) -> Iterator[Union[RawDataElement, DataElement]]: """Create a generator to efficiently return the raw data elements. .. note:: This function is used internally - usually there is no need to call it from user code. To read data from a DICOM file, :func:`dcmread` shall be used instead. Parameters ---------- fp : file-like The file-like to read from. is_implicit_VR : bool ``True`` if the data is encoded as implicit VR, ``False`` otherwise. is_little_endian : bool ``True`` if the data is encoded as little endian, ``False`` otherwise. stop_when : None, callable, optional If ``None`` (default), then the whole file is read. A callable which takes tag, VR, length, and returns ``True`` or ``False``. If it returns ``True``, ``read_data_element`` will just return. defer_size : int, str or float, optional See :func:`dcmread` for parameter info. encoding : Union[str, MutableSequence[str]] Encoding scheme specific_tags : list or None See :func:`dcmread` for parameter info. Yields ------- RawDataElement or DataElement Yields DataElement for undefined length UN or SQ, RawDataElement otherwise. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element from pydicom.values import convert_string if is_little_endian: endian_chr = "<" else: endian_chr = ">" # assign implicit VR struct to variable as use later if VR assumed missing implicit_VR_struct = Struct(endian_chr + "HHL") if is_implicit_VR: element_struct = implicit_VR_struct else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set() has_tag_set = bool(tag_set) if has_tag_set: tag_set.add(Tag(0x00080005)) # Specific Character Set while True: # VR: Optional[str] # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}" if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) # defend against switching to implicit VR, some writer do in SQ's # issue 1067, issue 1035 if not (b'AA' <= VR <= b'ZZ') and config.assume_implicit_vr_switch: # invalid VR, must be 2 cap chrs, assume implicit and continue VR = None group, elem, length = implicit_VR_struct.unpack(bytes_read) else: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = ( fp_read(length) if length > 0 else cast( Optional[bytes], empty_value_for_VR(VR, raw=True) ) ) if debugging: dotdot = "..." if length > 20 else " " displayed_value = value[:20] if value else b'' logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(displayed_value), dotdot, displayed_value, dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): # *Specific Character String* is b'' for empty value encoding = convert_string( cast(bytes, value) or b'', is_little_endian ) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # VR UN with undefined length shall be handled as SQ # see PS 3.5, section 6.2.2 if VR == 'UN': VR = 'SQ' # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None or VR == 'UN' and config.replace_un_with_known_vr: try: VR = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = _unpack_tag(fp_read(4), endian_chr) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: logger_debug( f"{fp_tell():08X}: Reading/parsing undefined length " "sequence" ) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value( fp, is_little_endian, delimiter, defer_size ) # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None): """Return a Dataset instance containing the next dataset in the file. Parameters ---------- fp : an opened file object is_implicit_VR : boolean True if file transfer syntax is implicit VR. is_little_endian : boolean True if file has little endian transfer syntax. bytelength : int, None, optional None to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional optional call_back function which can terminate reading. See help for data_element_generator for details defer_size : int, None, optional Size to avoid loading large elements in memory. See ``dcmread`` for more parameter info. parent_encoding : optional encoding to use as a default in case a Specific Character Set (0008,0005) isn't specified specific_tags : list or None See ``dcmread`` for parameter info. Returns ------- a Dataset instance See Also -------- pydicom.dataset.Dataset A collection (dictionary) of Dicom `DataElement` instances. """ raw_data_elements = dict() fp_start = fp.tell() is_implicit_VR = _is_implicit_vr( fp, is_implicit_VR, is_little_endian, stop_when) fp.seek(fp_start) de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fp_start < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: # XXX is this error visible enough to user code with just logging? logger.error(str(details) + " in file " + getattr(fp, "name", "<no filename>")) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) if 0x00080005 in raw_data_elements: char_set = DataElement_from_raw(raw_data_elements[0x00080005]) encoding = convert_encodings(char_set) else: encoding = parent_encoding ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def read_dataset(fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, bytelength: Optional[int] = None, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None, defer_size: Optional[Union[str, int, float]] = None, parent_encoding: Union[ str, MutableSequence[str]] = default_encoding, specific_tags: Optional[List[BaseTag]] = None, at_top_level: bool = True) -> Dataset: """Return a :class:`~pydicom.dataset.Dataset` instance containing the next dataset in the file. Parameters ---------- fp : file-like An opened file-like object. is_implicit_VR : bool ``True`` if file transfer syntax is implicit VR. is_little_endian : bool ``True`` if file has little endian transfer syntax. bytelength : int, None, optional ``None`` to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional Optional call_back function which can terminate reading. See help for :func:`data_element_generator` for details defer_size : int, str or float, optional Size to avoid loading large elements in memory. See :func:`dcmread` for more parameter info. parent_encoding : str or List[str] Optional encoding to use as a default in case (0008,0005) *Specific Character Set* isn't specified. specific_tags : list of BaseTag, optional See :func:`dcmread` for parameter info. at_top_level: bool If dataset is top level (not within a sequence). Used to turn off explicit VR heuristic within sequences Returns ------- dataset.Dataset A Dataset instance. See Also -------- :class:`~pydicom.dataset.Dataset` A collection (dictionary) of DICOM :class:`~pydicom.dataelem.DataElement` instances. """ raw_data_elements: Dict[BaseTag, Union[RawDataElement, DataElement]] = {} fp_start = fp.tell() is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian, stop_when, is_sequence=not at_top_level) fp.seek(fp_start) de_gen = data_element_generator( fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags, ) try: while (bytelength is None) or (fp.tell() - fp_start < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: if config.settings.reading_validation_mode == config.RAISE: raise msg = str(details) + " in file " + getattr(fp, "name", "<no filename>") warnings.warn(msg, UserWarning) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) encoding: Union[str, MutableSequence[str]] if 0x00080005 in raw_data_elements: elem = cast(RawDataElement, raw_data_elements[BaseTag(0x00080005)]) char_set = cast(Optional[Union[str, MutableSequence[str]]], DataElement_from_raw(elem).value) encoding = convert_encodings(char_set) # -> List[str] else: encoding = parent_encoding # -> Union[str, MutableSequence[str]] ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def write_data_element(fp, data_element, encodings=None): """Write the data_element to file fp according to dicom media storage rules. """ # Write element's tag fp.write_tag(data_element.tag) # write into a buffer to avoid seeking back which can be expansive buffer = DicomBytesIO() buffer.is_little_endian = fp.is_little_endian buffer.is_implicit_VR = fp.is_implicit_VR VR = data_element.VR if not fp.is_implicit_VR and len(VR) != 2: msg = ("Cannot write ambiguous VR of '{}' for data element with " "tag {}.\nSet the correct VR before writing, or use an " "implicit VR transfer syntax".format(VR, repr(data_element.tag))) raise ValueError(msg) if data_element.is_raw: # raw data element values can be written as they are buffer.write(data_element.value) is_undefined_length = data_element.length == 0xFFFFFFFF else: if VR not in writers: raise NotImplementedError( "write_data_element: unknown Value Representation " "'{0}'".format(VR)) encodings = encodings or [default_encoding] encodings = convert_encodings(encodings) writer_function, writer_param = writers[VR] is_undefined_length = data_element.is_undefined_length if not data_element.is_empty: if VR in text_VRs or VR in ('PN', 'SQ'): writer_function(buffer, data_element, encodings=encodings) else: # Many numeric types use the same writer but with # numeric format parameter if writer_param is not None: writer_function(buffer, data_element, writer_param) else: writer_function(buffer, data_element) # valid pixel data with undefined length shall contain encapsulated # data, e.g. sequence items - raise ValueError otherwise (see #238) if is_undefined_length and data_element.tag == 0x7fe00010: encap_item = b'\xfe\xff\x00\xe0' if not fp.is_little_endian: # Non-conformant endianness encap_item = b'\xff\xfe\xe0\x00' if not data_element.value.startswith(encap_item): raise ValueError( "(7FE0,0010) Pixel Data has an undefined length indicating " "that it's compressed, but the data isn't encapsulated as " "required. See pydicom.encaps.encapsulate() for more " "information") value_length = buffer.tell() if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length and value_length > 0xffff): # see PS 3.5, section 6.2.2 for handling of this case msg = ('The value for the data element {} exceeds the size ' 'of 64 kByte and cannot be written in an explicit transfer ' 'syntax. The data element VR is changed from "{}" to "UN" ' 'to allow saving the data.'.format(data_element.tag, VR)) warnings.warn(msg) VR = 'UN' # write the VR for explicit transfer syntax if not fp.is_implicit_VR: if not in_py2: fp.write(bytes(VR, default_encoding)) else: fp.write(VR) if VR in extra_length_VRs: fp.write_US(0) # reserved 2 bytes if (not fp.is_implicit_VR and VR not in extra_length_VRs and not is_undefined_length): fp.write_US(value_length) # Explicit VR length field is 2 bytes else: # write the proper length of the data_element in the length slot, # unless is SQ with undefined length. fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length) fp.write(buffer.getvalue()) if is_undefined_length: fp.write_tag(SequenceDelimiterTag) fp.write_UL(0) # 4-byte 'length' of delimiter data item
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding): """Create a generator to efficiently return the raw data elements. Parameters ---------- fp : file-like object is_implicit_VR : boolean is_little_endian : boolean stop_when : None, callable, optional If None (default), then the whole file is read. A callable which takes tag, VR, length, and returns True or False. If it returns True, read_data_element will raise StopIteration. defer_size : int, str, None, optional See ``read_file`` for parameter info. encoding : Encoding scheme Returns ------- VR : None if implicit VR, otherwise the VR read from the file length : the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL) value_bytes : the raw bytes from the DICOM file (not parsed into python types) is_little_endian : boolean True if transfer syntax is little endian; else False. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: raise StopIteration # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) raise StopIteration # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: if defer_size is not None and length > defer_size: # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(value[:12]), dotdot, value[:12], dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionaryVR(tag) except KeyError: # Look ahead to see if it consists of items and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)