def test_nested_private_SQ(self): """Can successfully read a private SQ which contains additional SQs.""" # From issue 113. When a private SQ of undefined length is used, the # sequence is read in and the length of the SQ is determined upon # identification of the SQ termination sequence. When using nested # Sequences, the first termination sequence encountered actually # belongs to the nested Sequence not the parent, therefore the # remainder of the file is not read in properly ds = dcmread(nested_priv_SQ_name) # Make sure that the entire dataset was read in pixel_data_tag = TupleTag((0x7fe0, 0x10)) assert pixel_data_tag in ds # Check that the DataElement is indeed a Sequence tag = TupleTag((0x01, 0x01)) seq0 = ds[tag] assert 'SQ' == seq0.VR # Now verify the presence of the nested private SQ seq1 = seq0[0][tag] assert 'SQ' == seq1.VR # Now make sure the values that are parsed are correct assert b'Double Nested SQ' == seq1[0][tag].value assert b'Nested SQ' == seq0[0][0x01, 0x02].value
def absorb_delimiter_item( fp: BinaryIO, is_little_endian: bool, delimiter: BaseTag ) -> None: """Read (and ignore) undefined length sequence or item terminators.""" if is_little_endian: struct_format = "<HHL" else: struct_format = ">HHL" group, elem, length = unpack(struct_format, fp.read(8)) tag = TupleTag((group, elem)) if tag != delimiter: logger.warn( "Did not find expected delimiter " f"'{dictionary_description(delimiter)}', instead found " f"{tag} at file position 0x{fp.tell() - 8:X}" ) fp.seek(fp.tell() - 8) return logger.debug("%04x: Found Delimiter '%s'", fp.tell() - 8, dictionary_description(delimiter)) if length == 0: logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4) else: logger.debug("%04x: Expected 0x00000000 after delimiter, found 0x%x", fp.tell() - 4, length)
def test_no_transfer_syntax_in_meta(self): """Read file with file_meta, but has no TransferSyntaxUID in it.""" # From issue 258: if file has file_meta but no TransferSyntaxUID in it, # should assume default transfer syntax ds = dcmread(meta_missing_tsyntax_name) # is default transfer syntax # Repeat one test from nested private sequence test to maker sure # file was read correctly pixel_data_tag = TupleTag((0x7fe0, 0x10)) assert pixel_data_tag in ds
def testNoTransferSyntaxInMeta(self): """Read file with file_meta, but has no TransferSyntaxUID in it............""" # From issue 258: if file has file_meta but no TransferSyntaxUID in it, # should assume default transfer syntax ds = read_file(meta_missing_tsyntax_name) # is dicom default transfer syntax # Repeat one test from nested private sequence test to maker sure # file was read correctly pixel_data_tag = TupleTag((0x7fe0, 0x10)) self.assertTrue(pixel_data_tag in ds, "Failed to properly read a file with no Transfer Syntax in file_meta")
def _is_implicit_vr(fp, implicit_vr_is_assumed, is_little_endian, stop_when): """Check if the real VR is explicit or implicit. Parameters ---------- fp : an opened file object implicit_vr_is_assumed : bool True if implicit VR is assumed. If this does not match with the real transfer syntax, a user warning will be issued. is_little_endian : bool True if file has little endian transfer syntax. Needed to interpret the first tag. stop_when : None, optional Optional call_back function which can terminate reading. Needed to check if the next tag still belongs to the read dataset. Returns ------- True if implicit VR is used, False otherwise. """ tag_bytes = fp.read(4) vr = fp.read(2) if len(vr) < 2: return implicit_vr_is_assumed # it is sufficient to check if the VR is in valid ASCII range, as it is # extremely unlikely that the tag length accidentally has such a # representation - this would need the first tag to be longer than 16kB # (e.g. it should be > 0x4141 = 16705 bytes) vr1 = ord(vr[0]) if in_py2 else vr[0] vr2 = ord(vr[1]) if in_py2 else vr[1] found_implicit = not (0x40 < vr1 < 0x5B and 0x40 < vr2 < 0x5B) if found_implicit != implicit_vr_is_assumed: # first check if the tag still belongs to the dataset if stop_when # is given - if not, the dataset is empty and we just return endian_chr = "<" if is_little_endian else ">" tag = TupleTag(unpack(endian_chr + "HH", tag_bytes)) if stop_when is not None and stop_when(tag, vr, 0): return found_implicit # got to the real problem - warn or raise depending on config found_vr = 'implicit' if found_implicit else 'explicit' expected_vr = 'implicit' if not found_implicit else 'explicit' message = ('Expected {0} VR, but found {1} VR - using {1} VR for ' 'reading'.format(expected_vr, found_vr)) if config.enforce_valid_values: raise InvalidDicomError(message) warnings.warn(message, UserWarning) return found_implicit
def read_frame_raw(self, index: int) -> bytes: """Reads the raw pixel data of an individual frame item. Parameters ---------- index: int Zero-based frame index Returns ------- bytes Pixel data of a given frame item encoded in the transfer syntax. Raises ------ IOError When frame could not be read """ if index > self.number_of_frames: raise ValueError('Frame index exceeds number of frames in image.') logger.debug(f'read frame #{index}') frame_offset = self._basic_offset_table[index] self._fp.seek(self._first_frame_offset + frame_offset, 0) if self.metadata.file_meta.TransferSyntaxUID.is_encapsulated: try: stop_at = self._basic_offset_table[index + 1] - frame_offset except IndexError: # For the last frame, there is no next offset available. stop_at = -1 n = 0 # A frame may consist of multiple items (fragments). fragments = [] while True: tag = TupleTag(self._fp.read_tag()) if n == stop_at or int(tag) == SequenceDelimiterTag: break if int(tag) != ItemTag: raise ValueError(f'Failed to read frame #{index}.') length = self._fp.read_UL() fragments.append(self._fp.read(length)) n += 4 + 4 + length frame_data = b''.join(fragments) else: frame_data = self._fp.read(self._bytes_per_frame_uncompressed) if len(frame_data) == 0: raise IOError(f'Failed to read frame #{index}.') return frame_data
def testNestedPrivateSQ(self): """Can successfully read a private SQ which contains additional SQ's.....""" # From issue 113. When a private SQ of undefined length is used, the # sequence is read in and the length of the SQ is determined upon # identification of the SQ termination sequence. When using nested # Sequences, the first termination sequence encountered actually # belongs to the nested Sequence not the parent, therefore the # remainder of the file is not read in properly ds = read_file(nested_priv_SQ_name) # Make sure that the entire dataset was read in pixel_data_tag = TupleTag((0x7fe0, 0x10)) self.assertTrue(pixel_data_tag in ds, "Entire dataset was not parsed properly. PixelData is " "not present") # Check that the DataElement is indeed a Sequence tag = TupleTag((0x01, 0x01)) seq0 = ds[tag] self.assertEqual(seq0.VR, 'SQ', "First level sequence not parsed properly") # Now verify the presence of the nested private SQ seq1 = seq0[0][tag] self.assertEqual(seq1.VR, 'SQ', "Second level sequence not parsed properly") # Now make sure the values that are parsed are correct got = seq1[0][tag].value expected = b'Double Nested SQ' self.assertEqual(got, expected, "Expected a value of %s, got %s'" % (expected, got)) got = seq0[0][0x01, 0x02].value expected = b'Nested SQ' self.assertEqual(got, expected, "Expected a value of %s, got %s'" % (expected, got))
def absorb_delimiter_item(fp, is_little_endian, delimiter): """Read (and ignore) undefined length sequence or item terminators.""" if is_little_endian: struct_format = "<HHL" else: struct_format = ">HHL" group, elem, length = unpack(struct_format, fp.read(8)) tag = TupleTag((group, elem)) if tag != delimiter: msg = "Did not find expected delimiter '%s'" % dictionary_description(delimiter) msg += ", instead found %s at file position 0x%x" % (str(tag), fp.tell() - 8) logger.warn(msg) fp.seek(fp.tell() - 8) return logger.debug("%04x: Found Delimiter '%s'", fp.tell() - 8, dictionary_description(delimiter)) if length == 0: logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4) else: logger.debug("%04x: Expected 0x00000000 after delimiter, found 0x%x", fp.tell() - 4, length)
def _get_bot(fp: DicomFile, number_of_frames: int) -> List[int]: """Tries to read the value of the Basic Offset Table (BOT) item and builds it in case it is empty. Parameters ---------- fp: pydicom.filebase.DicomFile Pointer for DICOM PS3.10 file stream positioned at the first byte of the Pixel Data element number_of_frames: int Number of frames contained in the Pixel Data element Returns ------- List[int] Offset of each Frame item in bytes from the first byte of the Pixel Data element following the BOT item Note ---- Moves the pointer to the first byte of the open file following the BOT item (the first byte of the first Frame item). """ logger.debug('read Basic Offset Table') basic_offset_table = _read_bot(fp) first_frame_offset = fp.tell() tag = TupleTag(fp.read_tag()) if int(tag) != ItemTag: raise ValueError('Reading of Basic Offset Table failed') fp.seek(first_frame_offset, 0) # Basic Offset Table item must be present, but it may be empty if len(basic_offset_table) == 0: logger.debug('Basic Offset Table item is empty') if len(basic_offset_table) != number_of_frames: logger.debug('build Basic Offset Table item') basic_offset_table = _build_bot(fp, number_of_frames=number_of_frames) return basic_offset_table
def convert_tag(byte_string, is_little_endian, offset=0): """Return a decoded :class:`BaseTag<pydicom.tag.BaseTag>` from the encoded `byte_string`. Parameters ---------- byte_string : bytes The encoded tag. is_little_endian : bool ``True`` if the encoding is little endian, ``False`` otherwise. offset : int, optional The byte offset in `byte_string` to the start of the tag. Returns ------- BaseTag The decoded tag. """ if is_little_endian: struct_format = "<HH" else: struct_format = ">HH" return TupleTag(unpack(struct_format, byte_string[offset:offset + 4]))
def convert_tag( byte_string: bytes, is_little_endian: bool, offset: int = 0 ) -> BaseTag: """Return a decoded :class:`BaseTag<pydicom.tag.BaseTag>` from the encoded `byte_string`. Parameters ---------- byte_string : bytes The encoded tag. is_little_endian : bool ``True`` if the encoding is little endian, ``False`` otherwise. offset : int, optional The byte offset in `byte_string` to the start of the tag. Returns ------- BaseTag The decoded tag. """ fmt = "<HH" if is_little_endian else ">HH" value = cast(Tuple[int, int], unpack(fmt, byte_string[offset:offset + 4])) return TupleTag(value)
def _read_bot(fp: DicomFile) -> List[int]: """Reads the Basic Offset Table (BOT) item of an encapsulated Pixel Data element. Parameters ---------- fp: pydicom.filebase.DicomFile Pointer for DICOM PS3.10 file stream positioned at the first byte of the Pixel Data element Returns ------- List[int] Offset of each Frame item in bytes from the first byte of the Pixel Data element following the BOT item Note ---- Moves the pointer to the first byte of the open file following the BOT item (the first byte of the first Frame item). Raises ------ IOError When file pointer is not positioned at first byte of Pixel Data element """ tag = TupleTag(fp.read_tag()) if int(tag) not in _PIXEL_DATA_TAGS: raise IOError( 'Expected file pointer at first byte of Pixel Data element.') # Skip Pixel Data element header (tag, VR, length) pixel_data_element_value_offset = data_element_offset_to_value( fp.is_implicit_VR, 'OB') fp.seek(pixel_data_element_value_offset - 4, 1) is_empty, offsets = get_frame_offsets(fp) return offsets
def desensitization(save_path, filepath, originalDataPath, tuomin_path): necessary_tag = [ (0x0008, 0x0090), # Referring Physician's Name (0x0010, 0x0010), # Patient's Name (0x0010, 0x1040), # Patient's Address ] unnecessar_tag = [ (0x0008, 0x0080), # Institution Name (0x0008, 0x0081), # Institution Address (0x0008, 0x0092), # Referring Physician Address (0x0008, 0x1040), # Institutional Department Name (0x0008, 0x1048), # Physician(s) of Record (0x0008, 0x1050), # Performing Physician's Name (0x0010, 0x2154), # Patient's Telephone Numbers (0x0032, 0x1032), # Requesting Physician (0x0008, 0x0000), # [Patient's Name] (0x0018, 0x1151), ] # 标签脱敏 try: info = pydicom.read_file(filepath, force=True) try: if hasattr(info, 'InstanceNumber'): ID = str(info.SeriesInstanceUID) + '_' + str(info.InstanceNumber) else: ID = str(info.SeriesInstanceUID) except: if hasattr(info, 'InstanceNumber'): ID = str(info.StudyInstanceUID) + '_' + str(info.InstanceNumber) else: ID = str(info.StudyInstanceUID) SeriesInstanceUID = info.SeriesInstanceUID file_name = filepath.split('/')[-1] folder_name = filepath.replace(originalDataPath, '') folder_name1 = folder_name.replace(file_name, '') folder_path = save_path + folder_name1 try: os.makedirs(folder_path) except OSError: pass for necessary in necessary_tag: name = necessary[1] tag = TupleTag(necessary) try: info[tag].value = 'Anonymized by inferVISION ' # print info[tag].value except KeyError: if name == 144: info.add_new(0x80090, 'PN', 'Anonymized by inferVISION ') if name == 16: info.add_new(0x100010, 'PN', 'Anonymized by inferVISION ') if name == 4160: info.add_new(0x101040, 'LO', 'Anonymized by inferVISION ') for unnecessar in unnecessar_tag: tag = TupleTag(unnecessar) info.pop(tag, None) print(filepath + " Sensitive information has been deleted!!!!") # out_path = os.path.join(folder_path, file_name) # try: # st = info.SliceThickness try: # info[0x0018,0x1151].value = int(float(info[0x0018,0x1151].value)) info.save_as(out_path) except: # 由于源文件file_meta缺失导致文件无法重写保存的原因 info.file_meta.MediaStorageSOPClassUID = info.SOPClassUID info.file_meta.MediaStorageSOPInstanceUID = info.SOPInstanceUID info.file_meta.ImplementationClassUID = info.SOPClassUID info.save_as(out_path) os.system('gdcmconv -raw %s %s' % (out_path, out_path)) logging.basicConfig(level=logging.INFO, format = ' %(name)s - %(levelname)s -%(module)s: %(message)s', filename=tuomin_path, # 此处为脱敏日志路径 filemode='w') # format = '%(asctime)s - %(name)s - %(levelname)s -%(module)s: %(message)s', # datefmt = '%Y-%m-%d %H:%M:%S %p', # level = 10 logging.info(' %s\t%s\t%s\t%s\t' % ('2018-01-02',ID, filepath, "Sensitive information has been removed!!")) except: with open(originalDataPath + str(i) + '_error.log', 'a+') as errorfile: print >> errorfile, filepath
def data_element_generator( fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None, defer_size: Optional[Union[int, str, float]] = None, encoding: Union[str, MutableSequence[str]] = default_encoding, specific_tags: Optional[List[BaseTag]] = None ) -> Iterator[Union[RawDataElement, DataElement]]: """Create a generator to efficiently return the raw data elements. .. note:: This function is used internally - usually there is no need to call it from user code. To read data from a DICOM file, :func:`dcmread` shall be used instead. Parameters ---------- fp : file-like The file-like to read from. is_implicit_VR : bool ``True`` if the data is encoded as implicit VR, ``False`` otherwise. is_little_endian : bool ``True`` if the data is encoded as little endian, ``False`` otherwise. stop_when : None, callable, optional If ``None`` (default), then the whole file is read. A callable which takes tag, VR, length, and returns ``True`` or ``False``. If it returns ``True``, ``read_data_element`` will just return. defer_size : int, str or float, optional See :func:`dcmread` for parameter info. encoding : Union[str, MutableSequence[str]] Encoding scheme specific_tags : list or None See :func:`dcmread` for parameter info. Yields ------- RawDataElement or DataElement Yields DataElement for undefined length UN or SQ, RawDataElement otherwise. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element from pydicom.values import convert_string if is_little_endian: endian_chr = "<" else: endian_chr = ">" # assign implicit VR struct to variable as use later if VR assumed missing implicit_VR_struct = Struct(endian_chr + "HHL") if is_implicit_VR: element_struct = implicit_VR_struct else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set() has_tag_set = bool(tag_set) if has_tag_set: tag_set.add(Tag(0x00080005)) # Specific Character Set while True: # VR: Optional[str] # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}" if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) # defend against switching to implicit VR, some writer do in SQ's # issue 1067, issue 1035 if not (b'AA' <= VR <= b'ZZ') and config.assume_implicit_vr_switch: # invalid VR, must be 2 cap chrs, assume implicit and continue VR = None group, elem, length = implicit_VR_struct.unpack(bytes_read) else: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = ( fp_read(length) if length > 0 else cast( Optional[bytes], empty_value_for_VR(VR, raw=True) ) ) if debugging: dotdot = "..." if length > 20 else " " displayed_value = value[:20] if value else b'' logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(displayed_value), dotdot, displayed_value, dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): # *Specific Character String* is b'' for empty value encoding = convert_string( cast(bytes, value) or b'', is_little_endian ) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # VR UN with undefined length shall be handled as SQ # see PS 3.5, section 6.2.2 if VR == 'UN': VR = 'SQ' # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None or VR == 'UN' and config.replace_un_with_known_vr: try: VR = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = _unpack_tag(fp_read(4), endian_chr) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: logger_debug( f"{fp_tell():08X}: Reading/parsing undefined length " "sequence" ) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value( fp, is_little_endian, delimiter, defer_size ) # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def testGoodTuple(self): """Tags can be constructed with two-tuple of 2-byte integers.""" TupleTag((0xFFFF, 0xFFee)) tag = TupleTag((0x300a, 0x00b0)) self.assertEqual(tag.group, 0x300a, "Expected tag.group 0x300a, got %r" % tag.group)
def open(self) -> None: """Opens file and reads metadata from it. Raises ------ FileNotFoundError When file cannot be found OSError When file cannot be opened IOError When DICOM metadata cannot be read from file ValueError When DICOM dataset contained in file does not represent an image Note ---- Builds a Basic Offset Table to speed up subsequent frame-level access. """ logger.debug('read File Meta Information') file_meta = read_file_meta_info(self.filename) transfer_syntax_uid = UID(file_meta.TransferSyntaxUID) try: self._fp = DicomFile(str(self.filename), mode='rb') self._fp.is_little_endian = transfer_syntax_uid.is_little_endian self._fp.is_implicit_VR = transfer_syntax_uid.is_implicit_VR except FileNotFoundError: raise FileNotFoundError(f'File not found: "{self.filename}"') except Exception: raise OSError( f'Could not open file for reading: "{self.filename}"') logger.debug('read metadata elements') try: self._metadata = dcmread(self._fp, stop_before_pixels=True) except Exception as err: raise IOError( f'DICOM metadata cannot be read from file "{self.filename}": ' f'"{err}"') self._pixel_data_offset = self._fp.tell() # Determine whether dataset contains a Pixel Data element try: tag = TupleTag(self._fp.read_tag()) except EOFError: raise ValueError( 'Dataset does not represent an image information entity.') if int(tag) not in _PIXEL_DATA_TAGS: raise ValueError( 'Dataset does not represent an image information entity.') self._as_float = False if int(tag) in _FLOAT_PIXEL_DATA_TAGS: self._as_float = True # Reset the file pointer to the beginning of the Pixel Data element self._fp.seek(self._pixel_data_offset, 0) # Build the ICC Transformation object. This takes some time and should # be done only once to speedup subsequent color corrections. if self.metadata.SamplesPerPixel == 1: self._color_manager = None else: try: icc_profile = self.metadata.ICCProfile except AttributeError: try: if len(self.metadata.OpticalPathSequence) > 1: # This should not happen in case of a color image. logger.warning( 'color image contains more than one optical path') optical_path_item = self.metadata.OpticalPathSequence[0] icc_profile = optical_path_item.ICCProfile except (IndexError, AttributeError): raise AttributeError( 'No ICC Profile found in image metadata.') try: self._color_manager = ColorManager(icc_profile) except ValueError: logger.warning('could not read ICC Profile') self._color_manager = None logger.debug('build Basic Offset Table') transfer_syntax_uid = self.metadata.file_meta.TransferSyntaxUID if transfer_syntax_uid.is_encapsulated: try: self._basic_offset_table = _get_bot( self._fp, number_of_frames=self.number_of_frames) except Exception as err: raise IOError(f'Failed to build Basic Offset Table: "{err}"') self._first_frame_offset = self._fp.tell() else: if self._fp.is_implicit_VR: header_offset = 4 + 4 # tag and length else: header_offset = 4 + 2 + 2 + 4 # tag, VR, reserved and length self._first_frame_offset = self._pixel_data_offset + header_offset n_pixels = self._pixels_per_frame bits_allocated = self.metadata.BitsAllocated if bits_allocated == 1: self._basic_offset_table = [ int(np.floor(i * n_pixels / 8)) for i in range(self.number_of_frames) ] else: self._basic_offset_table = [ i * self._bytes_per_frame_uncompressed for i in range(self.number_of_frames) ] if len(self._basic_offset_table) != self.number_of_frames: raise ValueError( 'Length of Basic Offset Table does not match Number of Frames.' )
def convert_tag(byte_string, is_little_endian, offset=0): if is_little_endian: struct_format = "<HH" else: struct_format = ">HH" return TupleTag(unpack(struct_format, byte_string[offset:offset + 4]))
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None): """:return: (tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) """ if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: raise StopIteration # at end of file if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if VR in extra_length_VRs_b: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() if stop_when is not None: if stop_when(group, elem): rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs_b: rewind_length += 4 fp.seek(value_tell - rewind_length) raise StopIteration # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: if defer_size is not None and length > defer_size: # Flag as deferred by setting value to None, and skip bytes value = None fp.seek(fp_tell() + length) else: value = fp_read(length) # import pdb;pdb.set_trace() yield ((group, elem), VR, length, value, value_tell) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionary_VR((group, elem)) except KeyError: # Look ahead to see if it consists of items and # is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = b'SQ' if VR == b'SQ': yield ((group, elem), VR, length, None, value_tell) else: raise NotImplementedError("This reader does not handle " "undefined length except for SQ")
def test_tuple_tag(self): """Test quick tag construction with TupleTag.""" self.assertEqual(TupleTag((0xFFFF, 0xFFee)), BaseTag(0xFFFFFFEE))
def test_tuple_tag(self): """Test quick tag construction with TupleTag.""" assert TupleTag((0xFFFF, 0xFFee)) == BaseTag(0xFFFFFFEE)
def _unpack_tag(b: bytes, endianness: str) -> BaseTag: return TupleTag(cast(Tuple[int, int], unpack(f"{endianness}HH", b)))
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding, specific_tags=None): """Create a generator to efficiently return the raw data elements. Parameters ---------- fp : file-like object is_implicit_VR : boolean is_little_endian : boolean stop_when : None, callable, optional If None (default), then the whole file is read. A callable which takes tag, VR, length, and returns True or False. If it returns True, read_data_element will just return. defer_size : int, str, None, optional See ``dcmread`` for parameter info. encoding : Encoding scheme specific_tags : list or None See ``dcmread`` for parameter info. Returns ------- VR : None if implicit VR, otherwise the VR read from the file length : the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL) value_bytes : the raw bytes from the DICOM file (not parsed into python types) is_little_endian : boolean True if transfer syntax is little endian; else False. """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = config.debugging element_struct_unpack = element_struct.unpack defer_size = size_in_bytes(defer_size) tag_set = set() has_specific_char_set = True if specific_tags is not None: for tag in specific_tags: if isinstance(tag, (str, compat.text_type)): tag = Tag(tag_for_keyword(tag)) if isinstance(tag, BaseTag): tag_set.add(tag) has_specific_char_set = Tag(0x08, 0x05) in tag_set tag_set.add(Tag(0x08, 0x05)) has_tag_set = len(tag_set) > 0 while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: return # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFF: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) return # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFF: # don't defer loading of Specific Character Set value as it is # needed immediately to get the character encoding for other tags if has_tag_set and tag not in tag_set: # skip the tag if not in specific tags fp.seek(fp_tell() + length) continue if (defer_size is not None and length > defer_size and tag != BaseTag(0x00080005)): # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. " "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex( value[:12]), dotdot, value[:12], dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == BaseTag(0x00080005): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator # for use with future elements (SQs) encoding = convert_encodings(encoding) if not has_specific_char_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionary_VR(tag) except KeyError: # Look ahead to see if it consists of items # and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) if has_tag_set and tag not in tag_set: continue yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, # then store it if tag == (0x08, 0x05): from pydicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use # with future elements (SQs) encoding = convert_encodings(encoding) if not has_specific_char_set: continue # tags with undefined length are skipped after read if has_tag_set and tag not in tag_set: continue yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def _build_bot(fp: DicomFile, number_of_frames: int) -> List[int]: """Builds a Basic Offset Table (BOT) item of an encapsulated Pixel Data element. Parameters ---------- fp: pydicom.filebase.DicomFile Pointer for DICOM PS3.10 file stream positioned at the first byte of the Pixel Data element following the empty Basic Offset Table (BOT) number_of_frames: int Total number of frames in the dataset Returns ------- List[int] Offset of each Frame item in bytes from the first byte of the Pixel Data element following the BOT item Note ---- Moves the pointer back to the first byte of the Pixel Data element following the BOT item (the first byte of the first Frame item). Raises ------ IOError When file pointer is not positioned at first byte of first Frame item after Basic Offset Table item or when parsing of Frame item headers fails ValueError When the number of offsets doesn't match the specified number of frames """ initial_position = fp.tell() offset_values = [] current_offset = 0 i = 0 while True: frame_position = fp.tell() tag = TupleTag(fp.read_tag()) if int(tag) == SequenceDelimiterTag: break if int(tag) != ItemTag: fp.seek(initial_position, 0) raise IOError( 'Building Basic Offset Table (BOT) failed. ' f'Expected tag of Frame item #{i} at position {frame_position}.' ) length = fp.read_UL() if length % 2: fp.seek(initial_position, 0) raise IOError('Building Basic Offset Table (BOT) failed. ' f'Length of Frame item #{i} is not a multiple of 2.') elif length == 0: fp.seek(initial_position, 0) raise IOError('Building Basic Offset Table (BOT) failed. ' f'Length of Frame item #{i} is zero.') first_two_bytes = fp.read(2, 1) if not fp.is_little_endian: first_two_bytes = first_two_bytes[::-1] # In case of fragmentation, we only want to get the offsets to the # first fragment of a given frame. We can identify those based on the # JPEG and JPEG 2000 markers that should be found at the beginning and # end of the compressed byte stream. if first_two_bytes in _START_MARKERS: current_offset = frame_position - initial_position offset_values.append(current_offset) i += 1 fp.seek(length - 2, 1) # minus the first two bytes if len(offset_values) != number_of_frames: raise ValueError( 'Number of frame items does not match specified Number of Frames.') else: basic_offset_table = offset_values fp.seek(initial_position, 0) return basic_offset_table