def test_replace_identifiers(self): print("Testing deid.dicom replace_identifiers") from deid.dicom import replace_identifiers from deid.dicom import get_identifiers from pydicom import read_file dicom_files = get_dicom(self.dataset, return_dir=True) ids = get_identifiers(dicom_files) # Before blanking, 28 fields don't have blanks notblanked = read_file(dicom_files[0]) notblanked_fields = [ x for x in notblanked.dir() if notblanked.get(x) != '' ] # 28 self.assertTrue(len(notblanked_fields) == 28) updated_files = replace_identifiers(dicom_files, ids, output_folder=self.tmpdir) # After replacing only 9 don't have blanks blanked = read_file(updated_files[0]) blanked_fields = [x for x in blanked.dir() if blanked.get(x) != ''] self.assertTrue(len(blanked_fields) == 9)
def test_get_identifiers(self): print("Testing deid.dicom get_identifiers") from deid.dicom import get_identifiers dicom_files = get_dicom(self.dataset, return_dir=True) ids = get_identifiers(dicom_files) self.assertTrue(len(ids) == 1) self.assertTrue(isinstance(ids, dict)) self.assertEqual(len(ids['cookie-47']), 7)
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # removes empty / null # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ dicom.get(x) for x in fields if x.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = [x for x in fields if "Instance" in x] actual = extract_fields_list(dicom, actions) self.assertEqual(actual, expected) # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values ids[dicom.filename]["cookie_names"] = expected_names ids[dicom.filename]["operator_names"] = expected_operator # This is for fields ids[dicom.filename]["instance_fields"] = expected ids[dicom.filename]["id"] = "new-cookie-id" ids[dicom.filename]["source_id"] = "new-operator-id" replaced = replace_identifiers(dicom, ids=ids, save=False, deid=self.deid) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id") # Currently we don't well handle tag types, so we convert to string for field in expected_operator: self.assertTrue(str(field) not in cleaned)
def _anonimize(self, image): ''' self._anonimize(image) => file path, anonimiza la imagen pasada por parámetros según las reglas definidas en self.recipe y devuelve la ruta de un fichero temporal con dichas modificaciones ''' files = [image.path] ids = get_identifiers(files) cleaned_files = replace_identifiers(dicom_files=files, deid=self.recipe, ids=ids) return cleaned_files[0]
def main(): if len(sys.argv) is not 3: print("argv") sys.exit(1) input_folder = sys.argv[1] output_folder = sys.argv[2] dicom_files = [ join(input_folder, dicom_file) for dicom_file in listdir(input_folder) ] ids = get_identifiers(dicom_files) # or use default conf, and then keep AccessionNumber #recipe = DeidRecipe('deid.conf') recipe = DeidRecipe() #recipe.deid['header'].remove({'action': 'REMOVE', 'field': 'AccessionNumber'}) recipe.deid['header'].append({ 'action': 'REMOVE', 'field': 'InstitutionName' }) updated_ids = dict() for image, fields in ids.items(): #fields['id'] = 'cookiemonster' #fields['source_id'] = "cookiemonster-image-%s" %(count) updated_ids[basename(image)] = fields if not exists(output_folder): try: makedirs(output_folder) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise cleaned_files = replace_identifiers(dicom_files=dicom_files, deid=recipe, ids=updated_ids, output_folder=output_folder)
def test_nested_replace(self): """ Fields are read into a dictionary lookup that should index back to the correct data element. We add this test to ensure this is happening, meaning that a replace action to a particular contains: string changes both top level and nested fields. %header REPLACE contains:StudyInstanceUID var:new_val """ print("Test nested_replace") dicom_file = get_file(self.dataset) actions = [{ "action": "REPLACE", "field": "contains:StudyInstanceUID", "value": "var:new_val", }] recipe = create_recipe(actions) items = get_identifiers([dicom_file]) for item in items: items[item]["new_val"] = "modified" result = replace_identifiers( dicom_files=dicom_file, ids=items, deid=recipe, save=False, ) self.assertEqual(1, len(result)) self.assertEqual(result[0].StudyInstanceUID, "modified") self.assertEqual( result[0].RequestAttributesSequence[0].StudyInstanceUID, "modified")
from deid.utils import get_installdir from deid.data import get_dataset import os # This is a complete example of doing de-identifiction. For details, see our docs # https://pydicom.github.io/deid # This will get a set of example cookie dicoms base = get_dataset('dicom-cookies') dicom_files = list( get_files(base)) # todo : consider using generator functionality # This is the function to get identifiers from deid.dicom import get_identifiers ids = get_identifiers(dicom_files) #** # Here you might save them in your special (IRB approvied) places # And then provide replacement anonymous ids to put back in the data # A cookie tumor example is below #** ################################################################################ # The Deid Recipe # # The process of flagging images comes down to writing a set of filters to # check if each image meets some criteria of interest. For example, I might # create a filter called "xray" that is triggered when the Modality is CT or XR. # We specify these fliters in a simple text file called a "deid recipe." When # you work with the functions, you have the choice to instantiate the object
def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.error( "To PUT without GET you must provide a json file with ids.") sys.exit(1) ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if args.do_print is True: print(ids) else: save_identifiers(ids, output_folder) if do_put is True: cleaned_files = replace_identifiers(dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))
for rootPatient, directoriesPatient, filenamesPatient in os.walk( os.path.join(src, directory)): directoriesPatient.sort() count_patientDir = 1 for directorySequence in directoriesPatient: for rootImage, directoriesImage, filenamesImage in os.walk( os.path.join(src, directory, directorySequence)): if len(filenamesImage) > 2: for filename in filenamesImage: possibleFilename = os.path.join( rootImage, filename) if filename.startswith('I'): ids = get_identifiers( possibleFilename ) # deid function: gets identifiers from a dicom file print(possibleFilename) for image, fields in ids.items(): series_description = fields[ 'SeriesDescription'] series_description = series_description.lower( ) #count_patientDir = 1 if series_description.startswith( "dif" ) or series_description.startswith(
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ x.element.value for uid, x in fields.items() if x.element.keyword.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = { uid: x for uid, x in fields.items() if "Instance" in x.element.keyword } actual = extract_fields_list(dicom, actions) for uid in expected: assert uid in actual # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values parser = DicomParser(dicom, recipe=self.deid) parser.define("cookie_names", expected_names) parser.define("operator_names", expected_operator) # This is for fields parser.define("instance_fields", expected) parser.define("id", "new-cookie-id") parser.define("source_id", "new-operator-id") parser.parse() # Were the changes made? assert parser.dicom.get("PatientID") == "new-cookie-id" assert parser.dicom.get("OperatorsName") == "new-operator-id" # Instance fields should be removed based on recipe for uid, field in parser.lookup["instance_fields"].items(): self.assertTrue(field.element.keyword not in parser.dicom) # Start over dicom = get_dicom(self.dataset) # We need to provide ids with variables "id" and "source_id" ids = { dicom.filename: { "id": "new-cookie-id", "source_id": "new-operator-id" } } # Returns list of updated dicom, since save is False replaced = replace_identifiers(dicom, save=False, deid=self.deid, ids=ids) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")
# Let's use the default dicom_schema from som.api.google.bigquery.schema import dicom_schema table = client.get_or_create_table(dataset=dataset, table_name='dicomCookies', schema=dicom_schema) # Let's load some dummy data from deid from deid.data import get_dataset from deid.dicom import get_files dicom_files = get_files(get_dataset('dicom-cookies')) # Now de-identify to get clean files from deid.dicom import get_identifiers, replace_identifiers metadata = get_identifiers(dicom_files) updated_files = replace_identifiers(dicom_files=dicom_files, ids=metadata) # Define some metadata for each entity and item updates = { "item_id": "cookieTumorDatabase", "entity_id": "cookie-47", "Modality": "cookie" } for image_file in dicom_files: if image_file in metadata: metadata[image_file].update(updates) else: metadata[image_file] = updates
for rootPatient, directoriesPatient, filenamesPatient in os.walk( os.path.join(src, directory)): for directorySequence in directoriesPatient: #for each sequence folder createFolder(os.path.join(dst, caseID, directorySequence)) #enters each Sequence folder for rootImage, directoriesImage, filenamesImage in os.walk( os.path.join(src, directory, directorySequence)): for filename in filenamesImage: possibleFilename = os.path.join(rootImage, filename) if filename.startswith('I'): #if 'DIRFILE' not in possibleFilename and 'dirty' not in possibleFilename and '.DS_Store' not in possibleFilename and '.bmp' not in possibleFilename and '._I10' not in possibleFilename and '._I11' not in possibleFilename and '._I00' not in possibleFilename and '._I200' not in possibleFilename and '._I460' not in possibleFilename and '._I880' not in possibleFilename: #print(possibleFilename) ids = get_identifiers( possibleFilename ) #deid function to get identifiers from a dicom file recipe.deid #changing header values #print(recipe.deid) #print(recipe.get_actions()) # check the actions that are defined updated_ids = dict() count = 0 for image, fields in ids.items(): #save these items to put into .csv patientName = fields['PatientName'] study_date = fields['StudyDate'] #institution_name = fields['InstitutionName'] #patient_age = fields['PatientAge']