def test_tag_expanders_tagelement(self): # includes public and private, groups and element numbers """ %header REMOVE contains:0010 """ print( "Test expanding tag by tag number part (matches groups and element numbers)" ) dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "contains:0010"}] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(135, len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value with self.assertRaises(KeyError): check2 = result[0]["PatientID"].value
def test_strip_sequences(self): """ Testing strip sequences: Checks to ensure that the strip_sequences removes all tags of type sequence. Since sequence removal relies on dicom.iterall(), nested sequences previously caused exceptions to be thrown when child (or duplicate) sequences existed within the header. %header ADD PatientIdentityRemoved Yeppers! """ print("Test strip_sequences") dicom_file = get_file(self.dataset) actions = [{ "action": "ADD", "field": "PatientIdentityRemoved", "value": "Yeppers!" }] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=True, ) self.assertEqual(1, len(result)) self.assertEqual(152, len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["00081110"].value for tag in result[0]: self.assertFalse(isinstance(tag.value, Sequence))
def test_replace_identifiers(self): print("Testing deid.dicom replace_identifiers") from deid.dicom import replace_identifiers from deid.dicom import get_identifiers from pydicom import read_file dicom_files = get_dicom(self.dataset, return_dir=True) ids = get_identifiers(dicom_files) # Before blanking, 28 fields don't have blanks notblanked = read_file(dicom_files[0]) notblanked_fields = [ x for x in notblanked.dir() if notblanked.get(x) != '' ] # 28 self.assertTrue(len(notblanked_fields) == 28) updated_files = replace_identifiers(dicom_files, ids, output_folder=self.tmpdir) # After replacing only 9 don't have blanks blanked = read_file(updated_files[0]) blanked_fields = [x for x in blanked.dir() if blanked.get(x) != ''] self.assertTrue(len(blanked_fields) == 9)
def test_jitter_timestamp(self): # DICOM datatype DT """ RECIPE RULE JITTER AcquisitionDateTime 1 """ print("Test timestamp jitter") dicom_file = get_file(self.dataset) actions = [{ "action": "JITTER", "field": "AcquisitionDateTime", "value": "1" }] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual("20230102011721.621000", result[0]["AcquisitionDateTime"].value)
def test_expander_except(self): # Remove all fields except Manufacturer """ RECIPE RULE REMOVE except:Manufacturer """ print("Test except expander") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "except:Manufacturer"}] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(2, len(result[0])) self.assertEqual("SIEMENS", result[0]["Manufacturer"].value) with self.assertRaises(KeyError): check1 = result[0]["ExposureTime"].value with self.assertRaises(KeyError): check2 = result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): check3 = result[0]["DataCollectionDiameter"].value
def test_fieldset_remove(self): """ RECIPE %fields field_set1 FIELD Manufacturer FIELD contains:Time %header REMOVE fields:field_set1 """ print("Test public tag fieldset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "fields:field_set1"}] fields = OrderedDict() fields["field_set1"] = [ { "field": "Manufacturer", "action": "FIELD" }, { "field": "contains:Collimation", "action": "FIELD" }, ] recipe = create_recipe(actions, fields) # Method 1: Use DicomParser parser = DicomParser(dicom_file, recipe=recipe) number_fields = len(parser.dicom) # 160 parser.parse() # The number of fields to be removed to_remove = len(parser.lookup["field_set1"]) expected_number = number_fields - to_remove # {'field_set1': {'(0008, 0070)': (0008, 0070) Manufacturer LO: 'SIEMENS' [Manufacturer], # '(0018, 9306)': (0018, 9306) Single Collimation Width FD: 1.2 [SingleCollimationWidth], # '(0018, 9307)': (0018, 9307) Total Collimation Width FD: 14.399999999999999 [TotalCollimationWidth]}} # Method 1: use replace_identifiers result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) print(len(result[0])) self.assertEqual(expected_number, len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["Manufacturer"].value with self.assertRaises(KeyError): check2 = result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): check3 = result[0]["SingleCollimationWidth"].value
def test_remove(self): """ RECIPE RULE REMOVE InstitutionName REMOVE 00190010 """ print("Test remove of public and private tags") dicom_file = get_file(self.dataset) field1name = "InstitutionName" field2name = "00190010" actions = [ { "action": "REMOVE", "field": field1name }, { "action": "REMOVE", "field": field2name }, ] recipe = create_recipe(actions) dicom = read_file(dicom_file) # Create a DicomParser to easily find fields parser = DicomParser(dicom_file) parser.parse() # The first in the list is the highest level field1 = list(parser.find_by_name(field1name).values())[0] field2 = list(parser.find_by_name(field2name).values())[0] self.assertIsNotNone(field1.element.value) self.assertIsNotNone(field2.element.value) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) # Create a DicomParser to easily find fields parser = DicomParser(result[0]) parser.parse() # Removed means we don't find them assert not parser.find_by_name(field1name) assert not parser.find_by_name(field2name) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): check1 = result[0][field1name].value with self.assertRaises(KeyError): check2 = result[0][field2name].value
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # removes empty / null # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ dicom.get(x) for x in fields if x.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = [x for x in fields if "Instance" in x] actual = extract_fields_list(dicom, actions) self.assertEqual(actual, expected) # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values ids[dicom.filename]["cookie_names"] = expected_names ids[dicom.filename]["operator_names"] = expected_operator # This is for fields ids[dicom.filename]["instance_fields"] = expected ids[dicom.filename]["id"] = "new-cookie-id" ids[dicom.filename]["source_id"] = "new-operator-id" replaced = replace_identifiers(dicom, ids=ids, save=False, deid=self.deid) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id") # Currently we don't well handle tag types, so we convert to string for field in expected_operator: self.assertTrue(str(field) not in cleaned)
def _anonimize(self, image): ''' self._anonimize(image) => file path, anonimiza la imagen pasada por parámetros según las reglas definidas en self.recipe y devuelve la ruta de un fichero temporal con dichas modificaciones ''' files = [image.path] ids = get_identifiers(files) cleaned_files = replace_identifiers(dicom_files=files, deid=self.recipe, ids=ids) return cleaned_files[0]
def test_valueset_remove_one_empty(self): """ Testing to ensure correct actions are taken when a defined valueset contains a field that has an empty value. Since the ConversionType flag contains "No Value", in the test below, value_set1 will only have the value from Manufacturer and should only identify the fields which contain "SIEMENS". %values value_set1 FIELD ConversionType FIELD Manufacturer %header REMOVE values:value_set1 """ import pydicom print("Test one empty value valueset") dicom_file = get_file(self.dataset) original_dataset = pydicom.dcmread(dicom_file) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "ConversionType", "action": "FIELD" }, { "field": "Manufacturer", "action": "FIELD" }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertEqual(len(parser.lookup["value_set1"]), 1) self.assertTrue("SIEMENS" in parser.lookup["value_set1"]) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertNotEqual(len(original_dataset), len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value with self.assertRaises(KeyError): check2 = result[0]["Manufacturer"].value
def test_replace_with_constant(self): """ RECIPE RULE REPLACE AccessionNumber 987654321 REPLACE 00190010 NEWVALUE! """ print("Test replace tags with constant values") dicom_file = get_file(self.dataset) newfield1 = "AccessionNumber" newvalue1 = "987654321" newfield2 = "00190010" newvalue2 = "NEWVALUE!" actions = [ { "action": "REPLACE", "field": newfield1, "value": newvalue1 }, { "action": "REPLACE", "field": newfield2, "value": newvalue2 }, ] recipe = create_recipe(actions) # Create a DicomParser to easily find fields parser = DicomParser(dicom_file) parser.parse() # The first in the list is the highest level field1 = list(parser.find_by_name(newfield1).values())[0] field2 = list(parser.find_by_name(newfield2).values())[0] self.assertNotEqual(newvalue1, field1.element.value) self.assertNotEqual(newvalue2, field2.element.value) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(newvalue1, result[0][newfield1].value) self.assertEqual(newvalue2, result[0][newfield2].value)
def test_valueset_remove(self): """ %values value_set1 FIELD contains:Manufacturer SPLIT contains:Physician by="^";minlength=3 %header REMOVE values:value_set1 """ print("Test public tag valueset") dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "contains:Manufacturer", "action": "FIELD" }, { "value": 'by="^";minlength=3', "field": "contains:Physician", "action": "SPLIT", }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertTrue("SIEMENS" in parser.lookup["value_set1"]) self.assertTrue("HIBBARD" in parser.lookup["value_set1"]) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value with self.assertRaises(KeyError): check2 = result[0]["Manufacturer"].value with self.assertRaises(KeyError): check3 = result[0]["PhysiciansOfRecord"].value
def test_add_private_constant(self): """ RECIPE RULE ADD 11112221 SIMPSON """ print("Test add private tag constant value") dicom_file = get_file(self.dataset) actions = [{"action": "ADD", "field": "11112221", "value": "SIMPSON"}] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual("SIMPSON", result[0]["11112221"].value)
def test_valueset_empty_remove(self): """ Testing to ensure correct actions are taken when a defined valueset contains no data (the field identified has an empty value). Since the ConversionType flag contains "No Value", in the test below, value_set1 will be empty and as a result this combination of rules should have no impact on the header. The input header should be identical to the output header. %values value_set1 FIELD ConversionType %header REMOVE values:value_set1 """ import pydicom print("Test empty value valueset") dicom_file = get_file(self.dataset) original_dataset = pydicom.dcmread(dicom_file) actions = [{"action": "REMOVE", "field": "values:value_set1"}] values = OrderedDict() values["value_set1"] = [ { "field": "ConversionType", "action": "FIELD" }, ] recipe = create_recipe(actions, values=values) # Check that values we want are present using DicomParser parser = DicomParser(dicom_file, recipe=recipe) parser.parse() self.assertEqual(len(parser.lookup["value_set1"]), 0) # Perform action result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(len(original_dataset), len(result[0]))
def test_expanders(self): """ RECIPE RULES REMOVE contains:Collimation REMOVE endswith:Diameter REMOVE startswith:Exposure """ print("Test contains, endswith, and startswith expanders") dicom_file = get_file(self.dataset) actions = [ { "action": "REMOVE", "field": "contains:Collimation" }, { "action": "REMOVE", "field": "endswith:Diameter" }, { "action": "REMOVE", "field": "startswith:Exposure" }, ] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual(153, len(result[0])) with self.assertRaises(KeyError): check1 = result[0]["ExposureTime"].value with self.assertRaises(KeyError): check2 = result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): check3 = result[0]["DataCollectionDiameter"].value
def test_jitter_date(self): # DICOM datatype DA """ RECIPE RULE JITTER StudyDate 1 """ print("Test date jitter") dicom_file = get_file(self.dataset) actions = [{"action": "JITTER", "field": "StudyDate", "value": "1"}] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual("20230102", result[0]["StudyDate"].value)
def main(): if len(sys.argv) is not 3: print("argv") sys.exit(1) input_folder = sys.argv[1] output_folder = sys.argv[2] dicom_files = [ join(input_folder, dicom_file) for dicom_file in listdir(input_folder) ] ids = get_identifiers(dicom_files) # or use default conf, and then keep AccessionNumber #recipe = DeidRecipe('deid.conf') recipe = DeidRecipe() #recipe.deid['header'].remove({'action': 'REMOVE', 'field': 'AccessionNumber'}) recipe.deid['header'].append({ 'action': 'REMOVE', 'field': 'InstitutionName' }) updated_ids = dict() for image, fields in ids.items(): #fields['id'] = 'cookiemonster' #fields['source_id'] = "cookiemonster-image-%s" %(count) updated_ids[basename(image)] = fields if not exists(output_folder): try: makedirs(output_folder) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=updated_ids, output_folder=output_folder)
def test_add_tag_variable_save_true(self): """RECIPE RULE ADD 11112221 var:myVar ADD PatientIdentityRemoved var:myVar """ print("Test add tag constant value from variable") dicom_file = get_file(self.dataset) actions = [ { "action": "ADD", "field": "11112221", "value": "var:myVar" }, { "action": "ADD", "field": "PatientIdentityRemoved", "value": "var:myVar" }, ] recipe = create_recipe(actions) # Method 1, define ids manually ids = {dicom_file: {"myVar": "SIMPSON"}} result = replace_identifiers( dicom_files=dicom_file, ids=ids, deid=recipe, save=True, remove_private=False, strip_sequences=False, output_folder=self.tmpdir, ) outputfile = read_file(result[0]) self.assertEqual(1, len(result)) self.assertEqual("SIMPSON", outputfile["11112221"].value) self.assertEqual("SIMPSON", outputfile["PatientIdentityRemoved"].value)
def test_tag_expanders_midtag(self): """REMOVE contains:8103 Should remove: (0008, 103e) Series Description """ dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "contains:8103"}] recipe = create_recipe(actions) # Ensure tag is present before removal dicom = read_file(dicom_file) assert "0008103e" in dicom result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) assert "0008103e" not in result[0]
def test_jitter_compounding(self): """ Testing jitter compounding: Checks to ensure that multiple jitter rules applied to the same field result in both rules being applied. While in practice this may be somewhat of a nonsensical use case when large recipes exist multiple rules may inadvertently be defined. In prior versions of pydicom/deid rules were additive and recipes are built in that manner. This test ensures consistency with prior versions. %header JITTER StudyDate 1 JITTER StudyDate 2 """ print("Test jitter compounding") dicom_file = get_file(self.dataset) actions = [ { "action": "JITTER", "field": "StudyDate", "value": "1" }, { "action": "JITTER", "field": "StudyDate", "value": "2" }, ] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=True, ) self.assertEqual(1, len(result)) self.assertEqual(151, len(result[0])) self.assertEqual("20230104", result[0]["StudyDate"].value)
def test_addremove_compounding(self): """ Testing add/remove compounding: Checks to ensure that multiple rules applied to the same field result in both rules being applied. While in practice this may be somewhat of a nonsensical use case when large recipes exist multiple rules may inadvertently be defined. In prior versions of pydicom/deid rules were additive and recipes are built in that manner. This test ensures consistency with prior versions. %header ADD PatientIdentityRemoved Yeppers! REMOVE PatientIdentityRemoved """ print("Test addremove compounding") dicom_file = get_file(self.dataset) actions = [ { "action": "ADD", "field": "PatientIdentityRemoved", "value": "Yeppers!" }, { "action": "REMOVE", "field": "PatientIdentityRemoved" }, ] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=True, ) self.assertEqual(1, len(result)) self.assertEqual(151, len(result[0])) with self.assertRaises(KeyError): willerror = result[0]["PatientIdentityRemoved"].value
def test_tag_expanders_taggroup(self): # This tests targets the group portion of a tag identifier - 0009 in (0009, 0001) """ %header REMOVE contains:0009 """ print( "Test expanding tag by tag number part (matches group numbers only)" ) dicom_file = get_file(self.dataset) actions = [{"action": "REMOVE", "field": "contains:0009"}] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): check1 = result[0]["00090010"].value
def test_add_public_constant(self): """ RECIPE RULE ADD PatientIdentityRemoved Yeppers! """ print("Test add public tag constant value") dicom_file = get_file(self.dataset) actions = [{ "action": "ADD", "field": "PatientIdentityRemoved", "value": "Yeppers!" }] recipe = create_recipe(actions) result = replace_identifiers( dicom_files=dicom_file, deid=recipe, save=False, remove_private=False, strip_sequences=False, ) self.assertEqual(1, len(result)) self.assertEqual("Yeppers!", result[0].PatientIdentityRemoved)
def test_nested_replace(self): """ Fields are read into a dictionary lookup that should index back to the correct data element. We add this test to ensure this is happening, meaning that a replace action to a particular contains: string changes both top level and nested fields. %header REPLACE contains:StudyInstanceUID var:new_val """ print("Test nested_replace") dicom_file = get_file(self.dataset) actions = [{ "action": "REPLACE", "field": "contains:StudyInstanceUID", "value": "var:new_val", }] recipe = create_recipe(actions) items = get_identifiers([dicom_file]) for item in items: items[item]["new_val"] = "modified" result = replace_identifiers( dicom_files=dicom_file, ids=items, deid=recipe, save=False, ) self.assertEqual(1, len(result)) self.assertEqual(result[0].StudyInstanceUID, "modified") self.assertEqual( result[0].RequestAttributesSequence[0].StudyInstanceUID, "modified")
# Let's be lazy and just update the extracted ones updated_ids = dict() count = 0 for image, fields in ids.items(): fields['id'] = 'cookiemonster' fields['source_id'] = "cookiemonster-image-%s" % (count) updated_ids[image] = fields count += 1 # You can look at each of the updated_ids entries and see the added variables # 'id': 'cookiemonster', # 'source_id': 'cookiemonster-image-2'}} # And then use the deid recipe and updated to create new files cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=updated_ids) # We can load in a cleaned file to see what was done from pydicom import read_file test_file = read_file(cleaned_files[0]) # test_file # (0008, 0018) SOP Instance UID UI: cookiemonster-image-1 # (0010, 0020) Patient ID LO: 'cookiemonster' # (0012, 0062) Patient Identity Removed CS: 'Yes' # (0028, 0002) Samples per Pixel US: 3 # (0028, 0010) Rows US: 1536 # (0028, 0011) Columns US: 2048 # (7fe0, 0010) Pixel Data OB: Array of 738444 bytes
# a field can either be just the name string, or a DicomElement if hasattr(field, "name"): field = field.name # Your organization should have it's own DICOM ORG ROOT. # For the purpose of an example, borrowing PYMEDPHYS_ROOT_UID ORG_ROOT = "1.2.826.0.1.3680043.10.188" # e.g. PYMEDPHYS_ROOT_UID prefix = field.lower().replace(" ", " ") bigint_uid = str(uuid.uuid4().int) full_uid = ORG_ROOT + "." + bigint_uid sliced_uid = full_uid[0:64] # A DICOM UID is limited to 64 characters return prefix + "-" + sliced_uid # Remember, the action is: # REPLACE StudyInstanceUID func:generate_uid # so the key needs to be generate_uid for item in items: items[item]["generate_uid"] = generate_uid # Now let's generate the cleaned files! It will output to a temporary directory # And then use the deid recipe and updated to create new files cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=items) # Print a cleaned file print(cleaned_files[0])
som.api.google.datastore import DataStoreClient as Client import os # Start google storage client for pmc-stanford client = Client(bucket_name='radiology') collection = client.create_collection(uid='IRB41449') # Let's load some dummy data from deid from deid.data import get_dataset from deid.dicom import get_files dicom_files = get_files(get_dataset('dicom-cookies')) # Now de-identify to get clean files from deid.dicom import get_identifiers, replace_identifiers ids=get_identifiers(dicom_files) updated_files = replace_identifiers(dicom_files=dicom_files, ids=ids) # Define some metadata for the entity metadata = { "source_id" : "cookieTumorDatabase", "id":"cookie-47", "Modality": "cookie"} # Upload the dataset client.upload_dataset(images=updated_files, collection=collection, uid=metadata['id'], entity_metadata=metadata) # Now try with adding metadata for an image images_metadata = { updated_files[0]:
# Let's be lazy and just update the extracted ones updated_ids = dict() count = 0 for image, fields in ids.items(): fields["id"] = "cookiemonster" fields["source_id"] = "cookiemonster-image-%s" % (count) updated_ids[image] = fields count += 1 # You can look at each of the updated_ids entries and see the added variables # 'id': 'cookiemonster', # 'source_id': 'cookiemonster-image-2'}} # And then use the deid recipe and updated to create new files cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=updated_ids) # We can load in a cleaned file to see what was done from pydicom import read_file test_file = read_file(cleaned_files[0]) # test_file # (0008, 0018) SOP Instance UID UI: cookiemonster-image-1 # (0010, 0020) Patient ID LO: 'cookiemonster' # (0012, 0062) Patient Identity Removed CS: 'Yes' # (0028, 0002) Samples per Pixel US: 3 # (0028, 0010) Rows US: 1536 # (0028, 0011) Columns US: 2048 # (7fe0, 0010) Pixel Data OB: Array of 738444 bytes
# Load in the recipe, we want to REPLACE InstanceCreationDate with a function recipe = DeidRecipe("deid.dicom") # Here is our function def generate_date(item, value, field, dicom): """This function will generate a dicom uid! You can expect it to be passed the dictionary of items extracted from the dicom (and your function) and variables, the original value (func:generate_uid) and the field object you are applying it to. """ return "20200608" # Add the function to each item to be found for item in items: items[item]["generate_date"] = generate_date # Clean the files cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, strip_sequences=False, ids=items) # Print two instances (one in sequence) print(cleaned_files[0].InstanceCreationDate) print(cleaned_files[0].ReferencedPerformedProcedureStepSequence[0]. InstanceCreationDate)
def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.error( "To PUT without GET you must provide a json file with ids.") sys.exit(1) ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if args.do_print is True: print(ids) else: save_identifiers(ids, output_folder) if do_put is True: cleaned_files = replace_identifiers(dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))