def test_get_files(self): print("Case 1: Test get files from dataset") from deid.dicom import get_files from deid.config import load_deid dicom_files = get_files(self.dataset) self.assertEqual(len(dicom_files), 7) print("Case 2: Ask for files from empty folder") dicom_files = get_files(self.tmpdir) self.assertEqual(len(dicom_files), 0)
def get_dicom(dataset): '''helper function to load a dicom ''' from deid.dicom import get_files from pydicom import read_file dicom_files = get_files(dataset) return read_file(next(dicom_files))
def get_file(dataset): """helper to get a dicom file """ from deid.dicom import get_files dicom_files = get_files(dataset) return next(dicom_files)
def test_get_files_as_list(self): print("Test test_get_files_as_list") print("Case 1: Test get files from dataset") from deid.dicom import get_files from deid.config import load_deid dicom_files = list(get_files(self.dataset)) found = len(dicom_files) expected = 7 self.assertEqual(found, expected) print("Case 2: Ask for files from empty folder") dicom_files = list(get_files(self.tmpdir)) found = len(dicom_files) expected = 0 self.assertEqual(found, expected)
def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.exit( "To PUT without GET you must provide a json file with ids.") ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if do_put is True: cleaned_files = replace_identifiers( dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder, ) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))
def main(args, parser): """inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria """ # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format) ) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") dicom_files = list( get_files(base, pattern=args.pattern) ) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print("\nSUMMARY ================================\n") if result["clean"]: bot.custom( prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN" ) if result["flagged"]: for group, files in result["flagged"].items(): bot.flag("%s %s files" % (group, len(files))) if args.save: folders = "-".join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime("%y-%m-%d"), ) with open(outfile, "w") as filey: filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n") for clean in result["clean"]: filey.writelines("%s\tCLEAN\t\t\n" % clean) for flagged, details in result["flagged"].items(): if details["flagged"] is True: for result in details["results"]: group = result["group"] reason = result["reason"] filey.writelines( "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason) ) print("Result written to %s" % outfile)
def test_get_files(self): print("Test test_get_files") print("Case 1: Test get files from dataset") from deid.dicom import get_files from deid.config import load_deid found = 0 for dicom_file in get_files(self.dataset): found += 1 expected = 7 self.assertEqual(found, expected) print("Case 2: Ask for files from empty folder") found = 0 for dicom_file in get_files(self.tmpdir): found += 1 expected = 0 self.assertEqual(found, expected)
def main(args, parser): '''inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria ''' # Global output folder #output_folder = args.outfolder #if output_folder is None: # output_folder = tempfile.mkdtemp() # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') dicom_files = list(get_files( base, pattern=args.pattern)) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print('\nSUMMARY ================================\n') if len(result['clean']) > 0: bot.custom(prefix='CLEAN', message="%s files" % len(result['clean']), color="CYAN") if len(result['flagged']) > 0: for group, files in result['flagged'].items(): bot.flag("%s %s files" % (group, len(files))) if args.save is True: folders = '-'.join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime('%y-%m-%d')) with open(outfile, 'w') as filey: filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n') for clean in result['clean']: filey.writelines('%s\tCLEAN\t\t\n' % clean) for flagged, details in result['flagged'].items(): if details['flagged'] is True: for result in details['results']: group = result['group'] reason = result['reason'] filey.writelines('%s\tFLAGGED\t%s\t%s\n' % (flagged, group, reason)) print('Result written to %s' % outfile)
def get_dicom(dataset, return_dir=False): '''helper function to load a dicom ''' from deid.dicom import get_files from pydicom import read_file dicom_files = get_files(dataset) if return_dir: return dicom_files return read_file(dicom_files[0])
def get_dicom(dataset, return_dir=False): """helper function to load a dicom""" from deid.dicom import get_files from pydicom import read_file dicom_files = get_files(dataset) if return_dir: return list(dicom_files) return read_file(next(dicom_files))
#!/usr/bin/env python3 # This is a complete example of inspecting pixels for PHI # based on a deid.dicom specification # https://pydicom.github.io/deid # This will get a set of example cookie dicoms from deid.dicom import get_files, has_burned_pixels from pydicom import read_file from deid.data import get_dataset from deid.logger import bot import os bot.level = 3 base = get_dataset("dicom-cookies") dicom_files = list( get_files(base)) # todo : consider using generator functionality results = has_burned_pixels(dicom_files=dicom_files, deid="examples/deid") # The dictionary has a "clean" list, and a "flagged" list, # Eg: # {'clean': [], # 'flagged': {'/home/vanessa/Documents/Dropbox/Code/dicom/deid/deid/data/dicom-cookies/image1.dcm': {'flagged': True, # 'results': [{'coordinates': [], # 'group': 'blacklist', # 'reason': ' ImageType missing or ImageType empty '}]},
# RADIOLOGY --------------------------------------------------- # This is an example script to upload data (images, text, metadata) to # google cloud storage and datastore. Data MUST be de-identified som.api.google.datastore import DataStoreClient as Client import os # Start google storage client for pmc-stanford client = Client(bucket_name='radiology') collection = client.create_collection(uid='IRB41449') # Let's load some dummy data from deid from deid.data import get_dataset from deid.dicom import get_files dicom_files = get_files(get_dataset('dicom-cookies')) # Now de-identify to get clean files from deid.dicom import get_identifiers, replace_identifiers ids=get_identifiers(dicom_files) updated_files = replace_identifiers(dicom_files=dicom_files, ids=ids) # Define some metadata for the entity metadata = { "source_id" : "cookieTumorDatabase", "id":"cookie-47", "Modality": "cookie"} # Upload the dataset client.upload_dataset(images=updated_files, collection=collection,
def main(): parser = get_parser() try: args = parser.parse_args() except: sys.exit(0) from deid.dicom import (get_files, DicomCleaner) from logger import bot if args.folder is None: bot.error("Please provide a folder with dicom files with --input.") sys.exit(1) # This is an iterator, so must convert to list to get length dicom_files = get_files(args.folder) # Create a Dicom Cleaner client number_files = len(list(get_files(args.folder))) client = DicomCleaner(output_folder=args.outfolder, deid=args.deid) bot.info('Processing [images]%s [output-folder]%s' % (number_files, client.output_folder)) outcomes = {True: 'flagged', False: ' clean'} # Keep a list of flagged and clean flagged = [] clean = [] summary = dict() # We will move images into respective folders if args.save is "pdf": pdf_report = '%s/deid-clean-%s.pdf' % (args.outfolder, number_files) pp = PdfPages(pdf_report) # Perform detection one at a time for dicom_file in dicom_files: dicom_name = os.path.basename(dicom_file) # detect --> clean result = client.detect(dicom_file) client.clean() summary[dicom_name] = result # Generate title/description for result title = '%s: %s' % (outcomes[result['flagged']], dicom_name) bot.info(title) # How does the user want to save data? if args.save == "dicom": outfile = client.save_dicom() elif args.save == "png": outfile = client.save_png(title=title) # pdf (default) else: plt = client.get_figure(title=title) fig = plt.gcf() pp.savefig(fig) plt.close(fig) # Whether dicom or png, append to respective list if args.save is not "pdf": if result['flagged']: flagged.append(outfile) else: clean.append(outfile) # Save summary json file summary_json = '%s/deid-clean-%s.json' % (args.outfolder, number_files) write_json(summary, summary_json) bot.info('json data written to %s' % summary_json) # When we get here, if saving pdf, close it. if args.save == "pdf": bot.info('pdf report written to %s' % pdf_report) pp.close() # Otherwise, move files into respective folders else: move_files(files=flagged, dest='%s/flagged' % args.outfolder) move_files(files=cleaned, dest='%s/clean' % args.outfolder)
# This is a complete example of using the cleaning client to inspect # and clean pixels # based on a deid.dicom specification # https://pydicom.github.io/deid ######################################### # 1. Get List of Files ######################################### # This will get a set of example cookie dicoms from deid.dicom import get_files from deid.data import get_dataset base = get_dataset('dicom-cookies') dicom_files = get_files(base) dicom_file = dicom_files[3] ######################################### # 2. Create Client ######################################### client = DicomCleaner() # You can set the output folder if you want, otherwis tmpdir is used client = DicomCleaner(output_folder='/home/vanessa/Desktop') # Steps are to detect, clean, and save in desired format, one image # at a time. # client.detect(dicom_file) # client.clean()
def get_file(dataset, image, tempdir=None): """helper to get a dicom file""" from deid.dicom import get_files dicom_files = get_files(dataset, pattern=image, tempdir=tempdir) return next(dicom_files)
def main(): parser = get_parser() try: args = parser.parse_args() except: sys.exit(0) from deid.dicom import get_files from logger import bot if args.folder is None: bot.error("Please provide a folder with dicom files with --input.") sys.exit(1) dicom_files = get_files(args.folder) number_files = len(list(get_files(args.folder))) ##### the following includes all steps to go from raw images to predictions ##### pickle models are passed as argument to select_text_among_candidates ##### and classify_text methods are result of a previously implemented pipeline. ##### just for the purpose of clarity the previous code is provided. ##### The commented code is the one necessary to get the models trained. # Keep a record for the user result = {'clean': 0, 'detected': 0, 'skipped': 0, 'total': number_files} # For each file, determine if PHI, for now just alert user for dicom_file in dicom_files: dicom_name = os.path.basename(dicom_file) # Try isn't a great approach, but if we log the skipped, we can debug try: dicom = UserData(dicom_file, verbose=args.verbose) # plots preprocessed image if not args.detect: dicom_save_name = '/data/%s_preprocessed.png' % dicom_name dicom.save_preprocessed_image(dicom_save_name) # detects objects in preprocessed image candidates = dicom.get_text_candidates() clean = True if candidates is not None: if args.verbose: number_candidates = len(candidates['coordinates']) bot.debug("%s has %s text candidates" % (dicom_name, number_candidates)) # plots objects detected # dicom.plot_to_check_save(candidates, # 'Total Objects Detected', # '/data/lao-detect-check.png') # selects objects containing text saved_model = '/code/data/linearsvc-hog-fulltrain2-90.pickle' maybe_text = dicom.select_text_among_candidates(saved_model) # plots objects after text detection # dicom.plot_to_check_save(maybe_text, # 'Objects Containing Text Detected', # '/data/lao-detect-candidates.png') # classifies single characters saved_model = '/code/data/linearsvc-hog-fulltrain36-90.pickle' classified = dicom.classify_text(saved_model) if args.verbose: number_text = len(classified['coordinates']) bot.debug("%s has %s classified text" % (dicom_name, number_text)) if len(classified) > 0: if args.verbose: bot.warning("%s flagged for text content." % dicom_name) clean = False else: bot.info("%s is clean" % dicom_name) else: bot.info("%s is clean" % dicom_name) if clean: result['clean'] += 1 else: result['detected'] += 1 # plots letters after classification # dicom.plot_to_check_save(classified, # 'Single Character Recognition', # '/data/lao-detect-letters.png') if not clean and not args.detect: dicom.scrape_save('/data/%s_cleaned.png' % dicom_name) except: bot.error("\nProblem loading %s, skipping" % dicom_name) result['skipped'] += 1 print('============================================================') # Final result print('\n=======================FINALRESULT==========================') print(os.path.basename(args.folder)) print("DETECTED: %s" % result['detected']) print("SKIPPED: %s" % result['skipped']) print("CLEAN: %s" % result['clean']) print("TOTAL: %s" % result['total'])