def test_remove(self): """ RECIPE RULE REMOVE InstitutionName REMOVE 00190010 """ print("Test remove of public and private tags") dicom_file = get_file(self.dataset) field1name = "InstitutionName" field2name = "00190010" actions = [ { "action": "REMOVE", "field": field1name }, { "action": "REMOVE", "field": field2name }, ] recipe = create_recipe(actions) dicom = read_file(dicom_file) # Create a DicomParser to easily find fields parser = DicomParser(dicom_file) parser.parse() # The first in the list is the highest level field1 = list(parser.find_by_name(field1name).values())[0] field2 = list(parser.find_by_name(field2name).values())[0] self.assertIsNotNone(field1.element.value) self.assertIsNotNone(field2.element.value) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) # Create a DicomParser to easily find fields parser = DicomParser(result[0]) parser.parse() # Removed means we don't find them assert not parser.find_by_name(field1name) assert not parser.find_by_name(field2name) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): check1 = result[0][field1name].value with self.assertRaises(KeyError): check2 = result[0][field2name].value
def test_fieldset_remove_private(self): """ %fields field_set2_private FIELD 00090010 FIELD PatientID %header REMOVE fields:field_set2_private """ print("Test private tag fieldset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "fields:field_set2_private"}] fields = OrderedDict() fields["field_set2_private"] = [ { "field": "00090010", "action": "FIELD" }, { "field": "PatientID", "action": "FIELD" }, ] recipe = create_recipe(actions, fields) parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertTrue("(0009, 0010)" in parser.lookup["field_set2_private"]) self.assertTrue("(0010, 0020)" in parser.lookup["field_set2_private"]) with self.assertRaises(KeyError): check1 = parser.dicom["00090010"].value with self.assertRaises(KeyError): check2 = parser.dicom["PatientID"].value
def get_identifiers( dicom_files, force=True, config=None, strip_sequences=False, remove_private=False ): """ extract all identifiers from a dicom image. This function returns a lookup by file name, where each value indexed includes a dictionary of nested fields (indexed by nested tag). Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) config: if None, uses default in provided module folder strip_sequences: if True, remove all sequences remove_private: remove private tags """ if config is None: config = "%s/config.json" % here if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] bot.debug("Extracting identifiers for %s dicom" % len(dicom_files)) lookup = dict() # Parse each dicom file for dicom_file in dicom_files: parser = DicomParser(dicom_file, force=force) lookup[parser.dicom_file] = parser.get_fields() return lookup
def test_fieldset_remove(self): """ RECIPE %fields field_set1 FIELD Manufacturer FIELD contains:Time %header REMOVE fields:field_set1 """ print("Test public tag fieldset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "fields:field_set1"}] fields = OrderedDict() fields["field_set1"] = [ { "field": "Manufacturer", "action": "FIELD" }, { "field": "contains:Collimation", "action": "FIELD" }, ] recipe = create_recipe(actions, fields) # Method 1: Use DicomParser parser = DicomParser(dicom_file, recipe=recipe) number_fields = len(parser.dicom) # 160 parser.parse() # The number of fields to be removed to_remove = len(parser.lookup["field_set1"]) expected_number = number_fields - to_remove # {'field_set1': {'(0008, 0070)': (0008, 0070) Manufacturer LO: 'SIEMENS' [Manufacturer], # '(0018, 9306)': (0018, 9306) Single Collimation Width FD: 1.2 [SingleCollimationWidth], # '(0018, 9307)': (0018, 9307) Total Collimation Width FD: 14.399999999999999 [TotalCollimationWidth]}} # Method 1: use replace_identifiers result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) print(len(result[0])) self.assertEqual(expected_number, len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["Manufacturer"].value with self.assertRaises(KeyError): check2 = result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): check3 = result[0]["SingleCollimationWidth"].value
def replace_identifiers( dicom_files, ids=None, deid=None, save=False, overwrite=False, output_folder=None, force=True, config=None, strip_sequences=False, remove_private=False, ): """replace identifiers using pydicom, can be slow when writing and saving new files. If you want to replace sequences, they need to be extracted with get_identifiers and expand_sequences to True. """ if not isinstance(dicom_files, list): dicom_files = [dicom_files] # Warn the user that we use the default deid recipe if not deid: bot.warning("No deid specification provided, will use defaults.") # ids (a lookup) is not required ids = ids or {} # Parse through dicom files, update headers, and save updated_files = [] for dicom_file in dicom_files: parser = DicomParser(dicom_file, force=force, config=config, recipe=deid) # If a custom lookup was provided, update the parser if parser.dicom_file in ids: parser.lookup.update(ids[parser.dicom_file]) parser.parse(strip_sequences=strip_sequences, remove_private=remove_private) # Save to file, otherwise return updated objects if save is True: ds = save_dicom( dicom=parser.dicom, dicom_file=parser.dicom_file, output_folder=output_folder, overwrite=overwrite, ) updated_files.append(ds) else: updated_files.append(parser.dicom) return updated_files
def test_valueset_remove_one_empty(self): """ Testing to ensure correct actions are taken when a defined valueset contains a field that has an empty value. Since the ConversionType flag contains "No Value", in the test below, value_set1 will only have the value from Manufacturer and should only identify the fields which contain "SIEMENS". %values value_set1 FIELD ConversionType FIELD Manufacturer %header REMOVE values:value_set1 """ import pydicom print("Test one empty value valueset") dicom_file = get_file(self.dataset) original_dataset = pydicom.dcmread(dicom_file) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "ConversionType", "action": "FIELD" }, { "field": "Manufacturer", "action": "FIELD" }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertEqual(len(parser.lookup["value_set1"]), 1) self.assertTrue("SIEMENS" in parser.lookup["value_set1"]) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertNotEqual(len(original_dataset), len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value with self.assertRaises(KeyError): check2 = result[0]["Manufacturer"].value
def test_replace_with_constant(self): """ RECIPE RULE REPLACE AccessionNumber 987654321 REPLACE 00190010 NEWVALUE! """ print("Test replace tags with constant values") dicom_file = get_file(self.dataset) newfield1 = "AccessionNumber" newvalue1 = "987654321" newfield2 = "00190010" newvalue2 = "NEWVALUE!" actions = [ { "action": "REPLACE", "field": newfield1, "value": newvalue1 }, { "action": "REPLACE", "field": newfield2, "value": newvalue2 }, ] recipe = create_recipe(actions) # Create a DicomParser to easily find fields parser = DicomParser(dicom_file) parser.parse() # The first in the list is the highest level field1 = list(parser.find_by_name(newfield1).values())[0] field2 = list(parser.find_by_name(newfield2).values())[0] self.assertNotEqual(newvalue1, field1.element.value) self.assertNotEqual(newvalue2, field2.element.value) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(newvalue1, result[0][newfield1].value) self.assertEqual(newvalue2, result[0][newfield2].value)
def test_valueset_remove(self): """ %values value_set1 FIELD contains:Manufacturer SPLIT contains:Physician by="^";minlength=3 %header REMOVE values:value_set1 """ print("Test public tag valueset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "contains:Manufacturer", "action": "FIELD" }, { "value": 'by="^";minlength=3', "field": "contains:Physician", "action": "SPLIT", }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertTrue("SIEMENS" in parser.lookup["value_set1"]) self.assertTrue("HIBBARD" in parser.lookup["value_set1"]) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value with self.assertRaises(KeyError): check2 = result[0]["Manufacturer"].value with self.assertRaises(KeyError): check3 = result[0]["PhysiciansOfRecord"].value
def test_valueset_empty_remove(self): """ Testing to ensure correct actions are taken when a defined valueset contains no data (the field identified has an empty value). Since the ConversionType flag contains "No Value", in the test below, value_set1 will be empty and as a result this combination of rules should have no impact on the header. The input header should be identical to the output header. %values value_set1 FIELD ConversionType %header REMOVE values:value_set1 """ import pydicom print("Test empty value valueset") dicom_file = get_file(self.dataset) original_dataset = pydicom.dcmread(dicom_file) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "ConversionType", "action": "FIELD" }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertEqual(len(parser.lookup["value_set1"]), 0) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(len(original_dataset), len(result[0]))
def test_remove_all_func(self): """ %header REMOVE ALL func:contains_hibbard """ print("Test tag removal by") dicom_file = get_file(self.dataset) def contains_hibbard(dicom, value, field, item): from pydicom.tag import Tag tag = Tag(field.element.tag) if tag in dicom: currentvalue = str(dicom.get(tag).value).lower() if "hibbard" in currentvalue: return True return False actions = [{ "action": "REMOVE", "field": "ALL", "value": "func:contains_hibbard" }] recipe = create_recipe(actions) # Create a parser, define function for it parser = DicomParser(dicom_file, recipe=recipe) parser.define("contains_hibbard", contains_hibbard) parser.parse() self.assertEqual(156, len(parser.dicom)) with self.assertRaises(KeyError): check1 = parser.dicom["ReferringPhysicianName"].value with self.assertRaises(KeyError): check2 = parser.dicom["PhysiciansOfRecord"].value with self.assertRaises(KeyError): check3 = parser.dicom["RequestingPhysician"].value with self.assertRaises(KeyError): check4 = parser.dicom["00331019"].value
def test_valueset_private(self): """ %values value_set2_private FIELD 00311020 SPLIT 00090010 by=" ";minlength=4 %header REMOVE values:value_set2_private """ print("Test private tag valueset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "values:value_set2_private"}] values = OrderedDict() values["value_set2_private"] = [ { "field": "00311020", "action": "FIELD" }, { "value": 'by=" ";minlength=4', "field": "00090010", "action": "SPLIT" }, ] recipe = create_recipe(actions, values=values) parser = DicomParser(dicom_file, recipe=recipe) parser.parse() for entry in ["SIEMENS", "M1212121", "DUMMY"]: assert entry in parser.lookup["value_set2_private"] with self.assertRaises(KeyError): check1 = parser.dicom["OtherPatientIDs"].value with self.assertRaises(KeyError): check2 = parser.dicom["Manufacturer"].value with self.assertRaises(KeyError): check3 = parser.dicom["00190010"].value
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ x.element.value for uid, x in fields.items() if x.element.keyword.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = { uid: x for uid, x in fields.items() if "Instance" in x.element.keyword } actual = extract_fields_list(dicom, actions) for uid in expected: assert uid in actual # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values parser = DicomParser(dicom, recipe=self.deid) parser.define("cookie_names", expected_names) parser.define("operator_names", expected_operator) # This is for fields parser.define("instance_fields", expected) parser.define("id", "new-cookie-id") parser.define("source_id", "new-operator-id") parser.parse() # Were the changes made? assert parser.dicom.get("PatientID") == "new-cookie-id" assert parser.dicom.get("OperatorsName") == "new-operator-id" # Instance fields should be removed based on recipe for uid, field in parser.lookup["instance_fields"].items(): self.assertTrue(field.element.keyword not in parser.dicom) # Start over dicom = get_dicom(self.dataset) # We need to provide ids with variables "id" and "source_id" ids = { dicom.filename: { "id": "new-cookie-id", "source_id": "new-operator-id" } } # Returns list of updated dicom, since save is False replaced = replace_identifiers(dicom, save=False, deid=self.deid, ids=ids) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")