def _write_file_meta_info(fp, meta_dataset): """Write the dicom group 2 dicom storage File Meta Information to the file. The file should already be positioned past the 128 byte preamble. Raises ValueError if the required data_elements (elements 2,3,0x10,0x12) are not in the dataset. If the dataset came from a file read with read_file(), then the required data_elements should already be there. """ fp.write(b'DICM') # File meta info is always LittleEndian, Explicit VR. After will change these # to the transfer syntax values set in the meta info fp.is_little_endian = True fp.is_implicit_VR = False if Tag((2, 1)) not in meta_dataset: meta_dataset.add_new((2, 1), 'OB', b"\0\1") # file meta information version # Now check that required meta info tags are present: missing = [] for element in [2, 3, 0x10, 0x12]: if Tag((2, element)) not in meta_dataset: missing.append(Tag((2, element))) if missing: raise ValueError( "Missing required tags {0} for file meta information".format( str(missing))) # Put in temp number for required group length, save current location to come back meta_dataset[(2, 0)] = DataElement((2, 0), 'UL', 0) # put 0 to start group_length_data_element_size = 12 # !based on DICOM std ExplVR group_length_tell = fp.tell() # Write the file meta datset, including temp group length length = write_dataset(fp, meta_dataset) group_length = length - group_length_data_element_size # counts from end of that # Save end of file meta to go back to end_of_file_meta = fp.tell() # Go back and write the actual group length fp.seek(group_length_tell) group_length_data_element = DataElement((2, 0), 'UL', group_length) write_data_element(fp, group_length_data_element) # Return to end of file meta, ready to write remainder of the file fp.seek(end_of_file_meta)
def test_matching_tags(self): """Dataset: key and data_element.tag mismatch raises ValueError.....""" def set_wrong_tag(): ds[0x10, 0x10] = data_element ds = Dataset() data_element = DataElement((0x300a, 0x00b2), "SH", "unit001") self.assertRaises(ValueError, set_wrong_tag)
def read_sequence_item(fp, is_implicit_VR, is_little_endian): """Read and return a single sequence item, i.e. a Dataset""" if is_little_endian: tag_length_format = "<HHL" else: tag_length_format = ">HHL" try: bytes_read = fp.read(8) group, element, length = unpack(tag_length_format, bytes_read) except: raise IOError, "No tag to read at file position %05x" % fp.tell() tag = (group, element) if tag == SequenceDelimiterTag: # No more items, time to stop reading data_element = DataElement(tag, None, None, fp.tell()-4) logger.debug("%08x: %s" % (fp.tell()-8, "End of Sequence")) if length != 0: logger.warning("Expected 0x00000000 after delimiter, found 0x%x, at position 0x%x" % (length, fp.tell()-4)) return None if tag != ItemTag: logger.warning("Expected sequence item with tag %s at file position 0x%x" % (ItemTag, fp.tell()-4)) else: logger.debug("%08x: %s Found Item tag (start of item)" % (fp.tell()-4, bytes2hex(bytes_read))) is_undefined_length = False if length == 0xFFFFFFFFL: ds = read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None) ds.is_undefined_length_sequence_item = True else: ds = read_dataset(fp, is_implicit_VR, is_little_endian, length) logger.debug("%08x: Finished sequence item" % fp.tell()) return ds
def testUpdate(self): """Dataset: update() method works with tag or name...................""" ds = self.dummy_dataset() pat_data_element = DataElement((0x10, 0x12), 'PN', 'Johnny') ds.update({'PatientName': 'John', (0x10, 0x12): pat_data_element}) self.assertEqual(ds[0x10, 0x10].value, 'John', "named data_element not set") self.assertEqual(ds[0x10, 0x12].value, 'Johnny', "set by tag failed")
def __setattr__(self, name, value): """Intercept any attempts to set a value for an instance attribute. If name is a dicom descriptive string (cleaned with CleanName), then set the corresponding tag and data_element. Else, set an instance (python) attribute as any other class would do. """ tag = tag_for_name(name) if tag is not None: # successfully mapped name to a tag if tag not in self: # don't have this tag yet->create the data_element instance VR = dictionaryVR(tag) data_element = DataElement(tag, VR, value) else: # already have this data_element, just changing its value data_element = self[tag] data_element.value = value # Now have data_element - store it in this dict self[tag] = data_element else: # name not in dicom dictionary - setting a non-dicom instance attribute # XXX note if user mis-spells a dicom data_element - no error!!! self.__dict__[name] = value
def test_empty_AT(self): """Write empty AT correctly..........""" # Was issue 74 data_elem = DataElement(0x00280009, "AT", []) expected = hex2bytes(( " 28 00 09 00" # (0028,0009) Frame Increment Pointer " 00 00 00 00" # length 0 )) write_data_element(self.f1, data_elem) got = self.f1.parent.getvalue() msg = ("Did not write zero-length AT value correctly. " "Expected %r, got %r") % (bytes2hex(expected), bytes2hex(got)) msg = "%r %r" % (type(expected), type(got)) msg = "'%r' '%r'" % (expected, got) self.assertEqual(expected, got, msg)
def run(self, ident_dir, clean_dir): # Get first date for tags set in relative_dates date_adjust = None audit_date_correct = None if self.relative_dates is not None: date_adjust = { tag: first_date - datetime(1970, 1, 1) for tag, first_date in self.get_first_date( ident_dir, self.relative_dates).items() } for root, _, files in os.walk(ident_dir): for filename in files: if filename.startswith('.'): continue source_path = os.path.join(root, filename) try: ds = dicom.read_file(source_path) except IOError: logger.error('Error reading file %s' % source_path) self.close_all() return False except InvalidDicomError: # DICOM formatting error self.quarantine_file(source_path, ident_dir, 'Could not read DICOM file.') continue move, reason = self.check_quarantine(ds) if move: self.quarantine_file(source_path, ident_dir, reason) continue # Store adjusted dates for recovery obfusc_dates = None if self.relative_dates is not None: obfusc_dates = { tag: datetime.strptime(ds[tag].value, '%Y%m%d') - date_adjust[tag] for tag in self.relative_dates } # Keep CSA Headers csa_headers = dict() if self.keep_csa_headers and (0x29, 0x10) in ds: csa_headers[(0x29, 0x10)] = ds[(0x29, 0x10)] for offset in [0x10, 0x20]: elno = (0x10 * 0x0100) + offset csa_headers[(0x29, elno)] = ds[(0x29, elno)] destination_dir = self.destination(source_path, clean_dir, ident_dir) if not os.path.exists(destination_dir): os.makedirs(destination_dir) try: ds, study_pk = self.anonymize(ds) except ValueError, e: self.quarantine_file( source_path, ident_dir, 'Error running anonymize function. There may be a ' 'DICOM element value that does not match the specified' ' Value Representation (VR). Error was: %s' % e) continue # Recover relative dates if self.relative_dates is not None: for tag in self.relative_dates: if audit_date_correct != study_pk and tag in AUDIT.keys( ): self.audit.update( ds[tag], obfusc_dates[tag].strftime('%Y%m%d'), study_pk) ds[tag].value = obfusc_dates[tag].strftime('%Y%m%d') audit_date_correct = study_pk # Restore CSA Header if len(csa_headers) > 0: for tag in csa_headers: ds[tag] = csa_headers[tag] # Set Patient Identity Removed to YES t = Tag((0x12, 0x62)) ds[t] = DataElement(t, 'CS', 'YES') # Set the De-identification method code sequence method_ds = Dataset() t = dicom.tag.Tag((0x8, 0x102)) if self.profile == 'clean': method_ds[t] = DataElement( t, 'DS', MultiValue(DS, ['113100', '113105'])) else: method_ds[t] = DataElement(t, 'DS', MultiValue(DS, ['113100'])) t = dicom.tag.Tag((0x12, 0x64)) ds[t] = DataElement(t, 'SQ', Sequence([method_ds])) out_filename = ds[ SOP_INSTANCE_UID].value if self.rename else filename clean_name = os.path.join(destination_dir, out_filename) try: ds.save_as(clean_name) except IOError: logger.error('Error writing file %s' % clean_name) self.close_all() return False
def testBackslash(self): """DataElement: String with '\\' sets multi-valued data_element.""" data_element = DataElement((1, 2), "DS", r"42.1\42.2\42.3") self.assertEqual(data_element.VM, 3, "Did not get a mult-valued value")
def setUp(self): self.data_elementSH = DataElement((1, 2), "SH", "hello") self.data_elementIS = DataElement((1, 2), "IS", "42") self.data_elementDS = DataElement((1, 2), "DS", "42.00001") self.data_elementMulti = DataElement((1, 2), "DS", ['42.1', '42.2', '42.3'])
def add_new(self, tag, VR, value): """Create a new DataElement instance and add it to this Dataset.""" data_element = DataElement(tag, VR, value) # use data_element.tag since DataElement verified it self[data_element.tag] = data_element
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None, encoding=default_encoding): """Create a generator to efficiently return the raw data elements Returns (VR, length, raw_bytes, value_tell, is_little_endian), where: VR -- None if implicit VR, otherwise the VR read from the file length -- the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL), value_bytes -- the raw bytes from the DICOM file (not parsed into python types) is_little_endian -- True if transfer syntax is little endian; else False """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: element_struct = Struct(endian_chr + "HHL") else: # Explicit VR # tag, VR, 2-byte length (or 0 if special VRs) element_struct = Struct(endian_chr + "HH2sH") extra_length_struct = Struct(endian_chr + "L") # for special VRs extra_length_unpack = extra_length_struct.unpack # for lookup speed # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = dicom.debugging element_struct_unpack = element_struct.unpack while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: raise StopIteration # at end of file if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if is_implicit_VR: # must reset VR each time; could have set last iteration (e.g. SQ) VR = None group, elem, length = element_struct_unpack(bytes_read) else: # explicit VR group, elem, VR, length = element_struct_unpack(bytes_read) if in_py3: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp_read(4) length = extra_length_unpack(bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFFL: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() tag = TupleTag((group, elem)) if stop_when is not None: # XXX VR may be None here!! Should stop_when just take tag? if stop_when(tag, VR, length): if debugging: logger_debug("Reading ended by stop_when callback. " "Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in extra_length_VRs: rewind_length += 4 fp.seek(value_tell - rewind_length) raise StopIteration # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFFL: if defer_size is not None and length > defer_size: # Flag as deferred by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded." "Skipping forward to next data element.") fp.seek(fp_tell() + length) else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(value[:12]), dotdot, value[:12], dotdot)) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from dicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes unless it is # identified as a Sequence if VR is None: try: VR = dictionaryVR(tag) except KeyError: # Look ahead to see if it consists of items and is thus a SQ next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4))) # Rewind the file fp.seek(fp_tell() - 4) if next_tag == ItemTag: VR = 'SQ' if VR == 'SQ': if debugging: msg = "{0:08x}: Reading/parsing undefined length sequence" logger_debug(msg.format(fp_tell())) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length, encoding) yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) # If the tag is (0008,0005) Specific Character Set, then store it if tag == (0x08, 0x05): from dicom.values import convert_string encoding = convert_string(value, is_little_endian, encoding=default_encoding) # Store the encoding value in the generator for use with future elements (SQs) encoding = convert_encodings(encoding) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)
def testBackslash(self): """DataElement: Passing string with '\\' sets multi-valued data_element.""" data_element = DataElement((1,2), "DS", r"42.1\42.2\42.3") # note r" to avoid \ as escape chr self.assertEqual(data_element.VM, 3, "Did not get a mult-valued value")
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, defer_size=None): """Create a generator to efficiently return the raw data elements Specifically, returns (VR, length, raw_bytes, value_tell, is_little_endian), where: VR -- None if implicit VR, otherwise the VR read from the file length -- the length as in the DICOM data element (could be DICOM "undefined length" 0xffffffffL), value_bytes -- the raw bytes from the DICOM file (not parsed into python types) is_little_endian -- True if transfer syntax is little endian; else False """ # Summary of DICOM standard PS3.5-2008 chapter 7: # If Implicit VR, data element is: # tag, 4-byte length, value. # The 4-byte length can be FFFFFFFF (undefined length)* # If Explicit VR: # if OB, OW, OF, SQ, UN, or UT: # tag, VR, 2-bytes reserved (both zero), 4-byte length, value # For all but UT, the length can be FFFFFFFF (undefined length)* # else: (any other VR) # tag, VR, (2 byte length), value # * for undefined length, a Sequence Delimitation Item marks the end # of the Value Field. # Note, except for the special_VRs, both impl and expl VR use 8 bytes; # the special VRs follow the 8 bytes with a 4-byte length # With a generator, state is stored, so we can break down # into the individual cases, and not have to check them again for each # data element # Make local variables so have faster lookup fp_read = fp.read fp_tell = fp.tell logger_debug = logger.debug debugging = dicom.debugging if is_little_endian: endian_chr = "<" else: endian_chr = ">" if is_implicit_VR: unpack_format = endian_chr + "HHL" # XXX in python >=2.5, can do struct.Struct to save time else: # Explicit VR unpack_format = endian_chr + "HH2sH" # tag, VR, 2-byte length (or 0 if special VRs) extra_length_format = endian_chr + "L" # for special VRs while True: # Read tag, VR, length, get ready to read value bytes_read = fp_read(8) if len(bytes_read) < 8: raise StopIteration # at end of file if debugging: debug_msg = "%08x: %s" % (fp.tell()-8, bytes2hex(bytes_read)) if is_implicit_VR: VR = None # must reset each time -- may have looked up on last iteration (e.g. SQ) group, elem, length = unpack(unpack_format, bytes_read) else: # explicit VR group, elem, VR, length = unpack(unpack_format, bytes_read) if VR in ('OB','OW','OF','SQ','UN', 'UT'): bytes_read = fp_read(4) length = unpack(extra_length_format, bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "%-47s (%04x, %04x)" % (debug_msg, group, elem) if not is_implicit_VR: debug_msg += " %s " % VR if length != 0xFFFFFFFFL: debug_msg += "Length: %d" % length else: debug_msg += "Length: Undefined length (FFFFFFFF)" logger_debug(debug_msg) # Now are positioned to read the value, but may not want to -- check stop_when value_tell = fp_tell() # logger.debug("%08x: start of value of length %d" % (value_tell, length)) tag = TupleTag((group, elem)) if stop_when is not None: if stop_when(tag, VR, length): # XXX VR may be None here!! Should stop_when just take tag? if debugging: logger_debug("Reading ended by stop_when callback. Rewinding to start of data element.") rewind_length = 8 if not is_implicit_VR and VR in ('OB','OW','OF','SQ','UN', 'UT'): rewind_length += 4 fp.seek(value_tell-rewind_length) raise StopIteration # Reading the value # First case (most common): reading a value with a defined length if length != 0xFFFFFFFFL: if defer_size is not None and length > defer_size: # Flag as deferred read by setting value to None, and skip bytes value = None logger_debug("Defer size exceeded. Skipping forward to next data element.") fp.seek(fp_tell()+length) else: value = fp_read(length) if debugging: dotdot = " " if length > 12: dotdot = "..." logger_debug("%08x: %-34s %s %r %s" % (value_tell, bytes2hex(value[:12]), dotdot, value[:12], dotdot)) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian) # Second case: undefined length - must seek to delimiter, # ... unless is SQ type, in which case is easier to parse it, because # undefined length SQs and items of undefined lengths can be nested # and it would be error-prone to read to the correct outer delimiter else: # Try to look up type to see if is a SQ # if private tag, won't be able to look it up in dictionary, # in which case just ignore it and read the bytes if VR is None: try: VR = dictionaryVR(tag) except KeyError: pass if VR == 'SQ': if debugging: logger_debug("%08x: Reading and parsing undefined length sequence" % fp_tell()) seq = read_sequence(fp, is_implicit_VR, is_little_endian, length) yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True) else: delimiter = SequenceDelimiterTag if debugging: logger_debug("Reading undefined length data element") value = read_undefined_length_value(fp, is_little_endian, delimiter, defer_size) yield RawDataElement(tag, VR, length, value, value_tell, is_implicit_VR, is_little_endian)