def __call__(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> bool: """\ Potentially anonymize a single DataElement, replacing its value with something that obscures the patient's identity. Parameters ---------- dataset : pydicom.dataset.Dataset The dataset to operate on. data_element : pydicom.dataset.DataElement The current element. Will be anonymized if its VR is UI. If multi-valued, each item will be anonymized independently. Returns ------- True if the element was anonymized, or False if not. """ if ( data_element.VR != "UI" or not data_element.value or pydicom.datadict.keyword_for_tag(data_element.tag).endswith("ClassUID") or data_element.tag == pydicom.datadict.tag_for_keyword("TransferSyntaxUID") ): return False if isinstance(data_element.value, pydicom.multival.MultiValue): data_element.value = list([self._ui_map[v] for v in data_element.value]) else: data_element.value = self._ui_map[data_element.value] return True
def datasetFromJSON(data: dict): #return Dataset.from_json(data) ds = Dataset() for key in data.keys(): tag = Tag(key) try: if 'Value' in data[key].keys(): if data[key]['vr'] == 'SQ': tempds = [] for subdata in data[key]['Value']: tempds.append(datasetFromJSON(subdata)) seq = Sequence(tempds) ds[key] = DataElement(tag, data[key]['vr'], seq) elif type( data[key]['Value'][0] ) == dict and 'Alphabetic' in data[key]['Value'][0].keys(): ds[key] = DataElement(tag, data[key]['vr'], data[key]['Value'][0]['Alphabetic']) else: if len(data[key]['Value']) > 1: ds[key] = DataElement(tag, data[key]['vr'], data[key]['Value']) else: ds[key] = DataElement(tag, data[key]['vr'], data[key]['Value'][0]) else: ds[key] = DataElement(tag, data[key]['vr'], '') except: from IPython import embed embed() return ds
def create_new_uids(folder, dicom_tag, dry_run=True): """Search folder (and subfolders) for DICOM files and create new UIDs for specified DICOM tag Parameters ---------- folder : Folder to (recursively) search for DICOM objects dicom_tag : DICOM tag specifying which UID to create new UIDs for (format as "0x0020000D") dry_run : Dry run, (True)/False """ # find dcm files dcm_files = glob.glob(os.path.join(folder,"**","*.dcm"), recursive=True) dcm_dict = dict() # loop over dcm files for dcm_file in dcm_files: dcm = pydicom.read_file(dcm_file) # if dicom tag exists in current dcm file if int(dicom_tag, 16) in dcm: # record replaceed uid and set new uid uid = dcm[dicom_tag].value if uid not in dcm_dict: dcm_dict[uid] = pydicom.uid.generate_uid() dcm[dicom_tag] = DataElement(int(dicom_tag,16), "UI", dcm_dict[uid]) else: # add dicom tag with new uid uid = pydicom.uid.generate_uid() dcm[dicom_tag] = DataElement(int(dicom_tag,16), "UI", uid) if not dry_run: # write dcm file to disk pydicom.write_file(dcm_file, dcm, write_like_original=False) if dry_run: print("Uids to update") print(dcm_file)
def update_uids_cb(ds: Dataset, elem: DataElement) -> None: """Callback for updating UID values except `SOPClassUID`""" if elem.VR == "UI" and elem.keyword != "SOPClassUID": if elem.VM > 1: elem.value = [ generate_uid(uid_prefix, [x] + add_uid_entropy) # type: ignore for x in elem.value ] else: elem.value = generate_uid( uid_prefix, [elem.value] + add_uid_entropy # type: ignore )
def test_add_private_entries(self): """dicom_dictionary: add and use a dict of new dictionary entries""" new_dict_items = { 0x10011001: ('SH', '1', "Test One",), 0x10011002: ('DS', '3', "Test Two", '', 'TestTwo'), } add_private_dict_entries('ACME 3.1', new_dict_items) ds = Dataset() ds[0x10010010] = DataElement(0x10010010, 'LO', 'ACME 3.1') ds[0x10011001] = DataElement(0x10011001, 'SH', 'Test') ds[0x10011002] = DataElement(0x10011002, 'DS', '1\\2\\3') assert 'Test' == ds[0x10011001].value assert [1, 2, 3] == ds[0x10011002].value
def __call__(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> bool: """\ Potentially anonymize a single DataElement, replacing its value with something that obscures the patient's identity. Parameters ---------- dataset : pydicom.dataset.Dataset The dataset to operate on. data_element : pydicom.dataset.DataElement The current element. Will be anonymized if it has a value and if its keyword is one of PatientAddress, RegionOfResidence, or CountryOfResidences. Returns ------- True if the element was anonymized, or False if not. """ value_factory = self._value_factories.get(data_element.tag, None) if not value_factory: return False if not data_element.value: return True data_element.value = value_factory(data_element.value) return True
def __call__(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> bool: """\ Potentially anonymize a single DataElement, replacing its value with something that obscures the patient's identity. Parameters ---------- dataset : pydicom.dataset.Dataset The dataset to operate on. data_element : pydicom.dataset.DataElement The current element. Will be anonymized if it has a value and if its keyword matches one of the keywords supplied when creating this anonymizer or matches IssuerOfPatientID. The element may be multi-valued, in which case each item is anonymized independently. Returns ------- True if the element was anonymized, or False if not. """ if data_element.tag in self.id_tags: self._replace_id(data_element) return True if self._anonymize_mitra_global_patient_id(dataset, data_element): return True if data_element.tag == self.issuer_tag and data_element.value: data_element.value = "DICOGNITO" return True return False
def set_dicom_attribute(self, keyword, value): """Sets specified DICOM attribute according to the keyword value pair Parameters ---------- keyword : Name of DICOM tag to set value for value : Value to set """ if tag_for_keyword(keyword): if keyword in self.dataset: if dictionary_VR( tag_for_keyword(keyword)) == "SQ" and isinstance( value, list): value = generate_sequence(keyword, value) self.dataset[tag_for_keyword(keyword)].value = value else: if dictionary_VR( tag_for_keyword(keyword)) == "SQ" and isinstance( value, list): value = generate_sequence(keyword, value) de = DataElement(tag_for_keyword(keyword), dictionary_VR(tag_for_keyword(keyword)), value) self.dataset[tag_for_keyword(keyword)] = de else: print("Keyword", keyword, "is an unknown DICOM attribute")
def test_default_for_writing_validation_mode(self): ds = Dataset() ds.is_implicit_VR = True ds.is_little_endian = True ds.SpecificCharacterSet = "ISO_IR 192" ds.add(DataElement(0x00080050, "SH", "洪^吉洞=홍^길동")) with pytest.warns(UserWarning): ds.save_as(DicomBytesIO())
def test_write_invalid_values(self, future_setter): ds = Dataset() ds.is_implicit_VR = True ds.is_little_endian = True ds.SpecificCharacterSet = "ISO_IR 192" ds.add(DataElement(0x00080050, "SH", "洪^吉洞=홍^길동")) with pytest.raises(ValueError): ds.save_as(DicomBytesIO())
def add_patient_age(dcm: Dicom, age: Optional[int] = None) -> Optional[int]: r"""Attempts to add the specified patient age to a DICOM object. An age can be determined using the following order of precedence: 1. Reading an existing age tag in the DICOM metadata 2. Computing an age using the date of birth and capture date metadata 3. Adding the age passed as an ``age`` parameter. If ``age`` is given and ``age`` doesn't match the age determined via DICOM metadata, the age will not be updated. Args: dcm (:class:`pydicom.FileDataset`): The DICOM object to add an age to age (int, optional): The age to attempt to add to ``dcm``. Returns: The determined patient age in years, or ``None`` if no age could be determined. """ capture_date = get_date(dcm) if AGE_TAG in dcm: dcm_age = int(re.sub("[^0-9]", "", dcm[AGE_TAG].value)) elif DOB in dcm and capture_date is not None: dob = dcm[DOB].value try: dob = dt.datetime.strptime(dob, "%m%d%Y") delta = (capture_date - dob).days / 365.25 dcm_age = round(delta) except ValueError: dcm_age = age else: dcm_age = age if dcm_age is not None: if dcm_age != age and age is not None: print(f"Computed age {dcm_age} doesn't match given age {age}. " "Not updating patient age.") else: dcm[AGE_TAG] = DataElement(AGE_TAG, "AS", f"{dcm_age:03d}Y") elif age is not None: dcm[AGE_TAG] = DataElement(AGE_TAG, "AS", f"{age:03d}Y") return dcm_age
def test_from_dicom(self, dicom_object, val, expected): if val is not None: de = DataElement(Tag.PhotometricInterpretation, "CS", val) dicom_object[Tag.PhotometricInterpretation] = de else: del dicom_object[Tag.PhotometricInterpretation] pm = PhotometricInterpretation.from_dicom(dicom_object) assert pm == expected
def get_sequence_item(self, value: SQValueType) -> "Dataset": """Return a sequence item for the JSON dict `value`. Parameters ---------- value : dict or None The sequence item from the JSON entry. Returns ------- dataset_class The decoded dataset item. Raises ------ KeyError If the "vr" key is missing for a contained element """ from pydicom import DataElement from pydicom.dataelem import empty_value_for_VR ds = self.dataset_class() value = {} if value is None else value for key, val in value.items(): if 'vr' not in val: raise KeyError(f"Data element '{self.tag}' must have key 'vr'") vr = val['vr'] unique_value_keys = tuple(set(val.keys()) & set(JSON_VALUE_KEYS)) if not unique_value_keys: # data element with no value elem = DataElement(tag=int(key, 16), value=empty_value_for_VR(vr), VR=vr) else: value_key = unique_value_keys[0] elem = DataElement.from_json(self.dataset_class, key, vr, val[value_key], value_key, self.bulk_data_element_handler) ds.add(elem) return ds
def _anonymize_mitra_global_patient_id( self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement ) -> bool: if data_element.tag.group == 0x0031 and data_element.tag.element % 0x0020 == 0: private_tag_group = data_element.tag.element >> 8 if dataset[(0x0031 << 16) + private_tag_group].value == "MITRA LINKED ATTRIBUTES 1.0": self._replace_id(data_element) data_element.value = data_element.value.encode() return True return False
def anonymize_institution_name(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> None: region = self.address_anonymizer.get_region(data_element.value) street_address = self.address_anonymizer.get_street_address( data_element.value) street = street_address.split(" ", 1)[1] dataset.InstitutionAddress = ", ".join([ street_address, region, self.address_anonymizer.get_country(data_element.value) ]) data_element.value = region + "'S " + street + " CLINIC"
def get_sequence_item(self, value): """Return a sequence item for the JSON dict `value`. Parameters ---------- value : dict or None The sequence item from the JSON entry. Returns ------- dataset_class The decoded dataset item. Raises ------ KeyError If the "vr" key is missing for a contained element """ ds = self.dataset_class() if value: for key, val in value.items(): if 'vr' not in val: fmt = 'Data element "{}" must have key "vr".' raise KeyError(fmt.format(self.tag)) vr = val['vr'] unique_value_keys = tuple( set(val.keys()) & set(JSON_VALUE_KEYS)) from pydicom import DataElement from pydicom.dataelem import empty_value_for_VR if not unique_value_keys: # data element with no value elem = DataElement(tag=int(key, 16), value=empty_value_for_VR(vr), VR=vr) else: value_key = unique_value_keys[0] elem = DataElement.from_json(self.dataset_class, key, vr, val[value_key], value_key) ds.add(elem) return ds
def _anonymize_date_and_time(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> None: date_value = data_element.value if isinstance(data_element.value, pydicom.multival.MultiValue): dates = list([v for v in data_element.value]) else: dates = [data_element.value] times = [] time_name = data_element.keyword.replace("Date", "Time") if time_name in dataset: time_element = dataset.data_element(time_name) time_value = time_element.value # type: ignore[union-attr] if time_value: if isinstance(time_value, pydicom.multival.MultiValue): times = list([v for v in time_value]) else: times = [time_value] new_dates = [] new_times = [] for i in range(len(dates)): date_value = dates[i] date_format = "%Y%m%d"[: len(date_value) - 2] old_date = datetime.datetime.strptime(date_value, date_format).date() time_value = "" old_hours = datetime.time() if i < len(times): time_value = times[i] if time_value: old_hours = datetime.datetime.strptime(time_value[:2], "%H").time() else: old_hours = datetime.time() old_datetime = datetime.datetime.combine(old_date, old_hours) new_datetime = old_datetime + self.offset new_dates.append(new_datetime.strftime(date_format)) new_times.append(new_datetime.strftime("%H") + time_value[2:]) new_dates_string = "\\".join(new_dates) new_times_string = "\\".join(new_times) data_element.value = new_dates_string if times: time_element.value = new_times_string # type: ignore[union-attr]
def copy_additional_dicom_attributes(self, dataset_to_copy_from, dataset_to_copy_to, additional_dicom_attributes): """Copies additional DICOM attributes for this module from one dataset to another Parameters ---------- dataset_to_copy_from : Dataset to copy DICOM attributes from dataset_to_copy_to : Dataset to copy DICOM attributes to additional_dicom_attributes : List of additional DICOM attributes to copy """ for dicom_attribute in additional_dicom_attributes: tag = tag_for_keyword(dicom_attribute) if dicom_attribute in dataset_to_copy_from: dataset_to_copy_to[tag] = dataset_to_copy_from[tag] else: de = DataElement(tag, dictionary_VR(tag), "") dataset_to_copy_to[tag] = de
def copy_required_dicom_attributes(self, dataset_to_copy_from, dataset_to_copy_to): """Copies required DICOM attributes for this module from one dataset to another Parameters ---------- dataset_to_copy_from : Dataset to copy DICOM attributes from dataset_to_copy_to : Dataset to copy DICOM attributes to """ for dicom_attribute in self.required_dicom_attributes: tag = tag_for_keyword(dicom_attribute) if dicom_attribute in dataset_to_copy_from: dataset_to_copy_to[tag] = dataset_to_copy_from[tag] elif dicom_attribute in dataset_to_copy_to: pass else: de = DataElement(tag, dictionary_VR(tag), "") dataset_to_copy_to[tag] = de
def _anonymize_datetime(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> None: if isinstance(data_element.value, pydicom.multival.MultiValue): datetimes = list([v for v in data_element.value]) else: datetimes = [data_element.value] new_datetimes = [] for datetime_value in datetimes: datetime_format = "%Y%m%d%H"[: len(datetime_value) - 2] old_datetime = datetime.datetime.strptime(datetime_value[:10], datetime_format) new_datetime = old_datetime + self.offset new_datetime_value = new_datetime.strftime(datetime_format) new_datetime_value += datetime_value[len(new_datetime_value) :] new_datetimes.append(new_datetime_value) data_element.value = "\\".join(new_datetimes)
def update_and_insert_additional_DICOM_attributes_in_ds( ds, keyword_and_value_dict): # For every keyword for keyword in keyword_and_value_dict: # Get corresponding tag and value tag = tag_for_keyword(keyword) # Verify that it is a valid keyword if tag is None: print("Unknown DICOM attribute:", keyword) continue # Get corresponding value value = None if dictionary_VR(tag) == "SQ": value = generate_sequence(keyword, keyword_and_value_dict[keyword]) else: value = keyword_and_value_dict[keyword] # If keyword already set, update its value, otherwise, create a new data element if keyword in ds: ds[tag].value = value else: ds[tag] = DataElement(tag, dictionary_VR(tag), value) # Return edited dataset return ds
def __call__(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> bool: """\ Potentially anonymize a single DataElement, replacing its value with self.value. Parameters ---------- dataset : pydicom.dataset.Dataset The dataset to operate on. data_element : pydicom.dataset.DataElement The current element. Will be anonymized if its keyword is the same as that for the keyword supplied when creating this anonymizer. Returns ------- True if the element was anonymized, or False if not. """ if data_element.tag == self.tag: data_element.value = self.value return True return False
def create_attribute_list(attr): ds = Dataset() for item in attr: elem = DataElement(item, dictionary_VR(item), "UIH") ds.add(elem) return ds
def add_density(dcm: Dicom, density: float) -> str: if DENSITY not in dcm.keys(): dcm[DENSITY] = DataElement(DENSITY, "FL", density) return str(density) else: return str(dcm[DENSITY].value)
def _replace_id(self, data_element: pydicom.DataElement) -> None: if isinstance(data_element.value, pydicom.multival.MultiValue): data_element.value = [self._new_id(id) for id in data_element.value] else: data_element.value = self._new_id(data_element.value)
def anonymize_department_name(self, dataset: pydicom.dataset.Dataset, data_element: pydicom.DataElement) -> None: data_element.value = "RADIOLOGY"
class TestReadDicomImage: def test_shape(self, dicom_object): array = read_dicom_image(dicom_object) assert isinstance(array, np.ndarray) assert array.ndim == 3, "dims C x H x W" assert array.shape[0] == 1, "channel dim size == 1" assert array.shape[1] == 128, "height dim size == 128" assert array.shape[2] == 128, "width dim size == 128" def test_array_dtype(self, dicom_object): array = read_dicom_image(dicom_object) assert isinstance(array, np.ndarray) assert array.dtype == np.int16 def test_min_max_values(self, dicom_object): array = read_dicom_image(dicom_object) assert isinstance(array, np.ndarray) assert array.min() == 128, "min pixel value 128" assert array.max() == 2191, "max pixel value 2191" def test_invalid_TransferSyntaxUID_loose_interpretation(self, dicom_object): dicom_object.file_meta.TransferSyntaxUID = "1.2.840.10008.1.2.4.90" # Assign random invalid TransferSyntaxUID array = read_dicom_image(dicom_object) assert isinstance(array, np.ndarray) assert array.min() == 128, "min pixel value 128" assert array.max() == 2191, "max pixel value 2191" def test_invalid_TransferSyntaxUID_exception(self, dicom_object): dicom_object.file_meta.TransferSyntaxUID = "1.2.840.10008.1.2.4.90" # Assign random invalid TransferSyntaxUID with pytest.raises(ValueError) as e: read_dicom_image(dicom_object, strict_interp=True) assert "does not appear to be correct" in str(e), "The expected exception message was not returned." def test_invalid_PixelData(self, dicom_object): dicom_object.PixelData = b"" with pytest.raises(ValueError) as e: read_dicom_image(dicom_object) expected_msg = "Unable to parse the pixel array after trying all possible TransferSyntaxUIDs." assert expected_msg in str(e), "The expected exception message was not returned." @pytest.mark.parametrize("shape_override", [None, (32, 32), (32, 32, 32)]) def test_stop_before_pixels(self, dicom_object, shape_override): np.random.seed(42) array1 = read_dicom_image(dicom_object) array2 = read_dicom_image(dicom_object, stop_before_pixels=True, shape=shape_override) assert isinstance(array1, np.ndarray) assert isinstance(array2, np.ndarray) if shape_override is None: assert not (array1 == array2).all() assert array1.shape == array2.shape else: assert array2.shape == (1,) + shape_override @pytest.mark.parametrize( "handler", [ KeepVolume(), SliceAtLocation(4), UniformSample(4, method="count"), ], ) def test_volume_handling(self, dicom_object_3d, handler, mocker, transfer_syntax): spy = mocker.spy(handler, "__call__") F = 8 dcm = dicom_object_3d(num_frames=F, syntax=transfer_syntax) array1 = read_dicom_image(dcm, volume_handler=spy, strict_interp=True) spy.assert_called_once() assert spy.mock_calls[0].args[0] == dcm, "handler should be called with DICOM object" assert array1.ndim < 4 or array1.shape[1] != 1, "3D dim should be squeezed when D=1" @pytest.mark.parametrize( "apply,center,width", [ pytest.param(True, None, None), pytest.param(True, DataElement(WINDOW_CENTER, "DS", 512), None), pytest.param(True, None, DataElement(WINDOW_WIDTH, "DS", 512)), pytest.param(True, DataElement(WINDOW_CENTER, "DS", 512), DataElement(WINDOW_WIDTH, "DS", 256)), pytest.param(True, DataElement(WINDOW_CENTER, "DS", 256), DataElement(WINDOW_WIDTH, "DS", 512)), pytest.param( True, DataElement(WINDOW_CENTER, "DS", [100, 200, 300]), DataElement(WINDOW_WIDTH, "DS", [200, 300, 400]), ), pytest.param(False, DataElement(WINDOW_CENTER, "DS", 512), DataElement(WINDOW_WIDTH, "DS", 256)), pytest.param(False, DataElement(WINDOW_CENTER, "DS", 256), DataElement(WINDOW_WIDTH, "DS", 512)), pytest.param( False, DataElement(WINDOW_CENTER, "DS", [100, 200, 300]), DataElement(WINDOW_WIDTH, "DS", [200, 300, 400]), ), ], ) def test_apply_window(self, dicom_object, apply, center, width): # set metadata if center is not None: dicom_object[WINDOW_CENTER] = center if width is not None: dicom_object[WINDOW_WIDTH] = width window = Window.from_dicom(dicom_object) pixels = read_dicom_image(dicom_object, apply_window=False) window_pixels = read_dicom_image(dicom_object, apply_window=apply) if center is not None and width is not None and apply: assert (window_pixels >= 0).all() assert (window_pixels <= window.width).all() assert (window_pixels[pixels <= window.lower_bound] == 0).all() assert (window_pixels[pixels >= window.upper_bound] == window.upper_bound - window.lower_bound).all() elif apply: pixels = dicom_object.pixel_array assert window_pixels.min() == 0 # tolerance of 1 here for rounding errors assert window_pixels.max() - (pixels.max() - pixels.min()) <= 1 else: assert (window_pixels == pixels).all() window = Window.from_dicom(dicom_object)
class TestWindow: @pytest.mark.parametrize( "center,width", [ pytest.param(None, None), pytest.param(DataElement(WINDOW_CENTER, "DS", 512), None), pytest.param(None, DataElement(WINDOW_WIDTH, "DS", 512)), pytest.param(DataElement(WINDOW_CENTER, "DS", 512), DataElement(WINDOW_WIDTH, "DS", 256)), pytest.param(DataElement(WINDOW_CENTER, "DS", 256), DataElement(WINDOW_WIDTH, "DS", 512)), pytest.param( DataElement(WINDOW_CENTER, "DS", [100, 200, 300]), DataElement(WINDOW_WIDTH, "DS", [200, 300, 400]) ), ], ) def test_from_dicom(self, dicom_object, center, width): if center is not None: dicom_object[WINDOW_CENTER] = center if width is not None: dicom_object[WINDOW_WIDTH] = width window = Window.from_dicom(dicom_object) if center is not None and width is not None: assert window.center == center.value if not isinstance(center.value, Iterable) else center[0] assert window.width == width.value if not isinstance(width.value, Iterable) else width[0] else: pixels = dicom_object.pixel_array assert window.center == (pixels.max() - pixels.min()) // 2 + pixels.min() assert window.width == pixels.max() - pixels.min() def test_repr(self): window = Window(100, 300) print(window) @pytest.mark.parametrize( "center,width,expected", [ pytest.param(200, 100, 150), pytest.param(100, 200, 0), pytest.param(100, 300, 0), ], ) def test_lower_bound(self, center, width, expected): window = Window(center, width) assert window.lower_bound == expected @pytest.mark.parametrize( "center,width,expected", [ pytest.param(200, 100, 250), pytest.param(100, 200, 200), pytest.param(100, 300, 250), ], ) def test_upper_bound(self, center, width, expected): window = Window(center, width) assert window.upper_bound == expected @pytest.mark.parametrize( "center,width", [ pytest.param(200, 100), pytest.param(100, 200), pytest.param(100, 300), ], ) def test_apply(self, center, width): np.random.seed(42) pixels = np.random.random(100).reshape(10, 10) pixels = (pixels * 1024).astype(np.uint16) window = Window(center, width) window_pixels = window.apply(pixels) assert (window_pixels >= 0).all() assert (window_pixels <= window.width).all() assert (window_pixels[pixels <= window.lower_bound] == 0).all() assert (window_pixels[pixels >= window.upper_bound] == window.upper_bound - window.lower_bound).all()