def _init_deid(self, deid=None, base=False, default_base="dicom"): """initalize the recipe with one or more deids, optionally including the default. This function is called at init time. If you need to add or work with already loaded configurations, use add/remove Parameters ========== deid: the deid recipe (or recipes) files to use. If more than one is provided, should be done in order of preference for load (later in the list overrides earlier loaded). default_base: load the default base before the user customizations. """ if deid is None: deid = [] if not isinstance(deid, list): deid = [deid] if base is True: deid.append(default_base) self._files = deid if len(deid) == 0: bot.info("You can add custom deid files with .load().") self.deid = load_combined_deid(deid)
def validate_dicoms(dcm_files,force=False): '''validate dicoms will test opening one or more dicom files, and return a list of valid files. Parameters ========== dcm_files: one or more dicom files to test ''' if not isinstance(dcm_files,list): dcm_files = [dcm_files] valids = [] bot.debug("Checking %s dicom files for validation." %(len(dcm_files))) for dcm_file in dcm_files: try: with open(dcm_file, 'rb') as filey: dataset = read_file(filey, force=force) valids.append(dcm_file) except: bot.warning('Cannot read input file {0!s}, skipping.'.format(dcm_file)) bot.info("Found %s valid dicom files" %(len(valids))) return valids
def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.exit( "To PUT without GET you must provide a json file with ids.") ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if do_put is True: cleaned_files = replace_identifiers( dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder, ) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))
def main(args, parser): """inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria """ # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format) ) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") dicom_files = list( get_files(base, pattern=args.pattern) ) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print("\nSUMMARY ================================\n") if result["clean"]: bot.custom( prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN" ) if result["flagged"]: for group, files in result["flagged"].items(): bot.flag("%s %s files" % (group, len(files))) if args.save: folders = "-".join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime("%y-%m-%d"), ) with open(outfile, "w") as filey: filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n") for clean in result["clean"]: filey.writelines("%s\tCLEAN\t\t\n" % clean) for flagged, details in result["flagged"].items(): if details["flagged"] is True: for result in details["results"]: group = result["group"] reason = result["reason"] filey.writelines( "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason) ) print("Result written to %s" % outfile)
def main(args, parser): '''inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria ''' # Global output folder #output_folder = args.outfolder #if output_folder is None: # output_folder = tempfile.mkdtemp() # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') dicom_files = list(get_files( base, pattern=args.pattern)) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print('\nSUMMARY ================================\n') if len(result['clean']) > 0: bot.custom(prefix='CLEAN', message="%s files" % len(result['clean']), color="CYAN") if len(result['flagged']) > 0: for group, files in result['flagged'].items(): bot.flag("%s %s files" % (group, len(files))) if args.save is True: folders = '-'.join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime('%y-%m-%d')) with open(outfile, 'w') as filey: filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n') for clean in result['clean']: filey.writelines('%s\tCLEAN\t\t\n' % clean) for flagged, details in result['flagged'].items(): if details['flagged'] is True: for result in details['results']: group = result['group'] reason = result['reason'] filey.writelines('%s\tFLAGGED\t%s\t%s\n' % (flagged, group, reason)) print('Result written to %s' % outfile)
def save_animation(self, output_folder=None, image_type="cleaned", title=None): """save an original or cleaned animation of a dicom. If there are not enough frames, then save_png should be used instead. """ if hasattr(self, image_type): from matplotlib import animation, rc animation.rcParams["animation.writer"] = "ffmpeg" image = getattr(self, image_type) # If we have rgb, choose a channel if len(image.shape) == 4: channel = random.choice(range(image.shape[3])) bot.warning("Selecting channel %s for rendering" % channel) image = image[:, :, :, channel] # Now we expect 3D, we can animate one dimension over time if len(image.shape) == 3: movie_file = self._get_clean_name(output_folder, "mp4") # First set up the figure, the axis, and the plot element we want to animate fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 6)) plt.close() ax.xlim = (0, image.shape[1]) ax.ylim = (0, image.shape[2]) ax.set_xticks([]) ax.set_yticks([]) img = ax.imshow(image[0, :, :].T, cmap="gray") img.set_interpolation("nearest") # The animation function should take an index i def animate(i): img.set_data(image[i, :, :].T) sys.stdout.flush() return (img,) bot.info("Generating animation...") anim = animation.FuncAnimation( fig, animate, frames=image.shape[0], interval=50, blit=True ) anim.save( movie_file, writer="ffmpeg", fps=10, dpi=100, metadata={"title": title or "deid-animation"}, ) return movie_file else: bot.warning( "save_animation() is only for 4D data. Use save_png instead." ) else: bot.warning("use detect() --> clean() before saving is possible.")
def save_identifiers(ids,output_folder=None): '''save_identifiers will parse over ids, and ensure that content is string (json serializable) ''' if output_folder is None: output_folder = tempfile.mkdtemp() output_file = "%s/deid-ids.pkl" %output_folder bot.info("Writing ids to %s" %output_file) result = pickle.dump(ids,open(output_file,"wb")) return result
def get_dataset(dataset=None): '''get_dataset will return some data provided by the application, based on a user-provided label. In the future, we can add https endpoints to retrieve online datasets. ''' data_base = get_installdir() valid_datasets = {'dicom-cookies': '%s/data/dicom-cookies' % data_base} if dataset is not None: # In case the user gave an extension dataset = os.path.splitext(dataset)[0].lower() if dataset in valid_datasets: return valid_datasets[dataset] bot.info("Valid datasets include: %s" % (','.join(list(valid_datasets.keys()))))
def get_dataset(dataset=None): """get_dataset will return some data provided by the application, based on a user-provided label. In the future, we can add https endpoints to retrieve online datasets. """ data_base = get_installdir() valid_datasets = { "dicom-cookies": os.path.join(data_base, "data", "dicom-cookies"), "animals": os.path.join(data_base, "data", "animals"), "humans": os.path.join(data_base, "data", "humans"), } if dataset is not None: # In case the user gave an extension dataset = os.path.splitext(dataset)[0].lower() if dataset in valid_datasets: return valid_datasets[dataset] bot.info("Valid datasets include: %s" % (",".join(list(valid_datasets.keys()))))
def clean(self): ''' take a dicom image and a list of pixel coordinates, and return a cleaned file (if output file is specified) or simply plot the cleaned result (if no file is specified) Parameters ========== add_padding: add N=margin pixels of padding margin: pixels of padding to add, if add_padding True ''' if not self.results: bot.warning('Use %s.detect() to find coordinates first.' % self) else: bot.info('Scrubbing %s.' % self.dicom_file) # Load in dicom file, and image data dicom = read_file(self.dicom_file, force=True) # We will set original image to image, cleaned to clean self.original = dicom.pixel_array self.cleaned = self.original.copy() # Compile coordinates from result coordinates = [] for item in self.results['results']: if len(item['coordinates']) > 0: for coordinate_set in item['coordinates']: # Coordinates expected to be list separated by commas new_coordinates = [ int(x) for x in coordinate_set.split(',') ] coordinates.append( new_coordinates) # [[1,2,3,4],...[1,2,3,4]] for coordinate in coordinates: minr, minc, maxr, maxc = coordinate self.cleaned[minc:maxc, minr:maxr] = 0 # should fill with black
def load_identifiers(identifiers_file): '''load_identifiers (currently) just loads a pickle file ''' bot.info("Loading %s" %identifiers_file) result = pickle.load(open(identifiers_file,"rb")) return result
def clean(self, fix_interpretation=True, pixel_data_attribute="PixelData"): """ take a dicom image and a list of pixel coordinates, and return a cleaned file (if output file is specified) or simply plot the cleaned result (if no file is specified) Parameters ========== add_padding: add N=margin pixels of padding margin: pixels of padding to add, if add_padding True fix_interpretation: fix the photometric interpretation if found off """ if not self.results: bot.warning("Use %s.detect() to find coordinates first." % self) else: bot.info("Scrubbing %s." % self.dicom_file) # Load in dicom file, and image data dicom = read_file(self.dicom_file, force=True) pixel_data = getattr(dicom, pixel_data_attribute) # Get expected and actual length of the pixel data (bytes, expected does not include trailing null byte) expected_length = get_expected_length(dicom) actual_length = len(pixel_data) padded_expected_length = expected_length + expected_length % 2 full_length = expected_length / 2 * 3 # upsampled data is a third larger full_length += (1 if full_length % 2 else 0 ) # trailing padding byte if even length # If we have YBR_FULL_2, must be RGB to obtain pixel data if (not dicom.file_meta.TransferSyntaxUID.is_compressed and dicom.PhotometricInterpretation == "YBR_FULL_422" and fix_interpretation and actual_length >= full_length): bot.warning( "Updating dicom.PhotometricInterpretation to RGB, set fix_interpretation to False to skip." ) photometric_original = dicom.PhotometricInterpretation dicom.PhotometricInterpretation = "RGB" self.original = dicom.pixel_array dicom.PhotometricInterpretation = photometric_original else: self.original = dicom.pixel_array # Compile coordinates from result coordinates = [] for item in self.results["results"]: if len(item["coordinates"]) > 0: for coordinate_set in item["coordinates"]: # Coordinates expected to be list separated by commas new_coordinates = [ int(x) for x in coordinate_set.split(",") ] coordinates.append( new_coordinates) # [[1,2,3,4],...[1,2,3,4]] # Instead of writing directly to data, create a mask # For 4D, (frames, X, Y, channel) if len(self.original.shape) == 4: mask = numpy.zeros(self.original.shape[1:3], dtype=numpy.uint8) # For 3D, (X, Y, channel) else: mask = numpy.zeros(self.original.shape[0:2], dtype=numpy.uint8) for coordinate in coordinates: minr, minc, maxr, maxc = coordinate # Update the mask: values set to 0 to be black mask[minc:maxc, minr:maxr] = 1 # Now apply finished mask to the data if len(self.original.shape) == 4: # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks # transposition to convert tile output (channel, X, Y) into (X, Y, channel) # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154 channel3mask = numpy.transpose(numpy.tile(mask, (3, 1, 1)), (1, 2, 0)) # use numpy.tile to copy and stack the 3D masks into 4D array to apply to 4D pixel data # tile converts (X, Y, channels) -> (frames, X, Y, channels), presumed ordering for 4D pixel data final_mask = numpy.tile(channel3mask, (self.original.shape[0], 1, 1, 1)) # apply final 4D mask to 4D pixel data self.cleaned = final_mask * self.original # greyscale: no need to stack into the channel dim since it doesnt exist elif len(self.original.shape) == 3: # numpy.tile converts (X, Y) -> (frames, X, Y) final_mask = numpy.tile(mask, (self.original.shape[0], 1, 1)) self.cleaned = final_mask * self.original else: bot.warning("Pixel array dimension %s is not recognized." % (self.original.shape))
def check_item(item): '''print item fields and values to screen ''' for key,val in item.items(): bot.info("%s: %s" %(key,val))
def clean(self, fix_interpretation=True, pixel_data_attribute="PixelData"): """ take a dicom image and a list of pixel coordinates, and return a cleaned file (if output file is specified) or simply plot the cleaned result (if no file is specified) Parameters ========== add_padding: add N=margin pixels of padding margin: pixels of padding to add, if add_padding True fix_interpretation: fix the photometric interpretation if found off """ if not self.results: bot.warning("Use %s.detect() to find coordinates first." % self) else: bot.info("Scrubbing %s." % self.dicom_file) # Load in dicom file, and image data dicom = read_file(self.dicom_file, force=True) pixel_data = getattr(dicom, pixel_data_attribute) # Get expected and actual length of the pixel data (bytes, expected does not include trailing null byte) expected_length = get_expected_length(dicom) actual_length = len(pixel_data) padded_expected_length = expected_length + expected_length % 2 full_length = expected_length / 2 * 3 # upsampled data is a third larger full_length += ( 1 if full_length % 2 else 0 ) # trailing padding byte if even length # If we have YBR_FULL_2, must be RGB to obtain pixel data if ( not dicom.file_meta.TransferSyntaxUID.is_compressed and dicom.PhotometricInterpretation == "YBR_FULL_422" and fix_interpretation and actual_length >= full_length ): bot.warning( "Updating dicom.PhotometricInterpretation to RGB, set fix_interpretation to False to skip." ) photometric_original = dicom.PhotometricInterpretation dicom.PhotometricInterpretation = "RGB" self.original = dicom.pixel_array dicom.PhotometricInterpretation = photometric_original else: self.original = dicom.pixel_array # Compile coordinates from result, generate list of tuples with coordinate and value # keepcoordinates == 1 (included in mask) and coordinates == 0 (remove). coordinates = [] for item in self.results["results"]: # We iterate through coordinates in order specified in file for coordinate_set in item.get("coordinates", []): # Each is a list with [value, coordinate] mask_value, new_coordinates = coordinate_set if not isinstance(new_coordinates, list): new_coordinates = [new_coordinates] for new_coordinate in new_coordinates: # Case 1: an "all" indicates applying to entire image if new_coordinate.lower() == "all": # no frames, just X, Y if len(self.original.shape) == 2: # minr, minc, maxr, maxc = [0, 0, Y, X] new_coordinate = [ 0, 0, self.original.shape[1], self.original.shape[0], ] # (frames, X, Y, channel) OR (frames, X,Y) if len(self.original.shape) >= 3: new_coordinate = [ 0, 0, self.original.shape[2], self.original.shape[1], ] else: new_coordinate = [int(x) for x in new_coordinate.split(",")] coordinates.append( (mask_value, new_coordinate) ) # [(1, [1,2,3,4]),...(0, [1,2,3,4])] # Instead of writing directly to data, create a mask of 1s (start keeping all) # For 4D, (frames, X, Y, channel) if len(self.original.shape) == 4: mask = numpy.ones(self.original.shape[1:3], dtype=numpy.uint8) # For 2D, (X, Y) or 3D (X, Y channel) else: mask = numpy.ones(self.original.shape[0:2], dtype=numpy.uint8) # Here we apply the coordinates to the mask, 1==keep, 0==clean for coordinate_value, coordinate in coordinates: minr, minc, maxr, maxc = coordinate # Update the mask: values set to 0 to be black mask[minc:maxc, minr:maxr] = coordinate_value # Now apply finished mask to the data if len(self.original.shape) == 4: # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks # transposition to convert tile output (channel, X, Y) into (X, Y, channel) # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154 channel3mask = numpy.transpose(numpy.tile(mask, (3, 1, 1)), (1, 2, 0)) # use numpy.tile to copy and stack the 3D masks into 4D array to apply to 4D pixel data # tile converts (X, Y, channels) -> (frames, X, Y, channels), presumed ordering for 4D pixel data final_mask = numpy.tile(channel3mask, (self.original.shape[0], 1, 1, 1)) # apply final 4D mask to 4D pixel data self.cleaned = final_mask * self.original # greyscale: no need to stack into the channel dim since it doesnt exist elif len(self.original.shape) == 3: # numpy.tile converts (X, Y) -> (frames, X, Y) final_mask = numpy.tile(mask, (self.original.shape[0], 1, 1)) self.cleaned = final_mask * self.original elif len(self.original.shape) == 2: self.cleaned = mask * self.original else: bot.warning( "Pixel array dimension %s is not recognized." % (str(self.original.shape)) )