Esempio n. 1
0
    def _init_deid(self, deid=None, base=False, default_base="dicom"):
        """initalize the recipe with one or more deids, optionally including 
           the default. This function is called at init time. If you need to add
           or work with already loaded configurations, use add/remove 
    
           Parameters
           ==========
           deid: the deid recipe (or recipes) files to use. If more than one
                 is provided, should be done in order of preference for load
                 (later in the list overrides earlier loaded).
           default_base: load the default base before the user customizations. 

        """
        if deid is None:
            deid = []

        if not isinstance(deid, list):
            deid = [deid]

        if base is True:
            deid.append(default_base)

        self._files = deid

        if len(deid) == 0:
            bot.info("You can add custom deid files with .load().")
        self.deid = load_combined_deid(deid)
Esempio n. 2
0
def validate_dicoms(dcm_files,force=False):
    '''validate dicoms will test opening one or more dicom files, and return a list
       of valid files.

       Parameters
       ==========
       dcm_files: one or more dicom files to test
    
    '''
    if not isinstance(dcm_files,list):
        dcm_files = [dcm_files]

    valids = []

    bot.debug("Checking %s dicom files for validation." %(len(dcm_files)))
    for dcm_file in dcm_files:

        try:
            with open(dcm_file, 'rb') as filey:
                dataset = read_file(filey, force=force)
            valids.append(dcm_file)
        except:
            bot.warning('Cannot read input file {0!s}, skipping.'.format(dcm_file))


    bot.info("Found %s valid dicom files" %(len(valids)))
    return valids
Esempio n. 3
0
def main(args, parser):

    # Global output folder
    output_folder = args.outfolder
    if output_folder is None:
        output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    if args.deid is not None:
        params = load_deid(args.deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params["format"], args.format))
    # Get list of dicom files
    base = args.input
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")
    basename = os.path.basename(base)
    dicom_files = list(
        get_files(base))  # todo : consider using generator functionality

    do_get = False
    do_put = False
    ids = None
    if args.action == "all":
        bot.info("GET and PUT identifiers from %s" % (basename))
        do_get = True
        do_put = True

    elif args.action == "get":
        do_get = True
        bot.info("GET and PUT identifiers from %s" % (basename))

    elif args.action == "put":
        bot.info("PUT identifiers from %s" % (basename))
        do_put = True
        if args.ids is None:
            bot.exit(
                "To PUT without GET you must provide a json file with ids.")

        ids = args.ids

    # GET identifiers

    if do_get is True:
        ids = get_identifiers(dicom_files)

    if do_put is True:
        cleaned_files = replace_identifiers(
            dicom_files=dicom_files,
            ids=ids,
            deid=args.deid,
            overwrite=args.overwrite,
            output_folder=output_folder,
        )

        bot.info("%s %s files at %s" %
                 (len(cleaned_files), args.format, output_folder))
Esempio n. 4
0
def main(args, parser):
    """inspect currently serves to inspect the header fields of a set
       of dicom files against a standard, and flag images that don't
       pass the different levels of criteria
    """

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting."
                % (params["format"], args.format)
            )
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")

    dicom_files = list(
        get_files(base, pattern=args.pattern)
    )  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print("\nSUMMARY ================================\n")
    if result["clean"]:
        bot.custom(
            prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN"
        )

    if result["flagged"]:
        for group, files in result["flagged"].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save:
        folders = "-".join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders,
            datetime.datetime.now().strftime("%y-%m-%d"),
        )

        with open(outfile, "w") as filey:
            filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n")

            for clean in result["clean"]:
                filey.writelines("%s\tCLEAN\t\t\n" % clean)

            for flagged, details in result["flagged"].items():
                if details["flagged"] is True:
                    for result in details["results"]:
                        group = result["group"]
                        reason = result["reason"]
                        filey.writelines(
                            "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason)
                        )

            print("Result written to %s" % outfile)
Esempio n. 5
0
def main(args, parser):
    '''inspect currently serves to inspect the header fields of a set
    of dicom files against a standard, and flag images that don't
    pass the different levels of criteria
    '''

    # Global output folder
    #output_folder = args.outfolder
    #if output_folder is None:
    #    output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params['format'] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params['format'], args.format))
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset('dicom-cookies')

    dicom_files = list(get_files(
        base,
        pattern=args.pattern))  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print('\nSUMMARY ================================\n')
    if len(result['clean']) > 0:
        bot.custom(prefix='CLEAN',
                   message="%s files" % len(result['clean']),
                   color="CYAN")

    if len(result['flagged']) > 0:
        for group, files in result['flagged'].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save is True:
        folders = '-'.join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders, datetime.datetime.now().strftime('%y-%m-%d'))
        with open(outfile, 'w') as filey:
            filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n')
            for clean in result['clean']:
                filey.writelines('%s\tCLEAN\t\t\n' % clean)
            for flagged, details in result['flagged'].items():
                if details['flagged'] is True:
                    for result in details['results']:
                        group = result['group']
                        reason = result['reason']
                        filey.writelines('%s\tFLAGGED\t%s\t%s\n' %
                                         (flagged, group, reason))

            print('Result written to %s' % outfile)
Esempio n. 6
0
    def save_animation(self, output_folder=None, image_type="cleaned", title=None):
        """save an original or cleaned animation of a dicom. If there are not
        enough frames, then save_png should be used instead.
        """
        if hasattr(self, image_type):
            from matplotlib import animation, rc

            animation.rcParams["animation.writer"] = "ffmpeg"

            image = getattr(self, image_type)

            # If we have rgb, choose a channel

            if len(image.shape) == 4:
                channel = random.choice(range(image.shape[3]))
                bot.warning("Selecting channel %s for rendering" % channel)
                image = image[:, :, :, channel]

            # Now we expect 3D, we can animate one dimension over time
            if len(image.shape) == 3:
                movie_file = self._get_clean_name(output_folder, "mp4")

                # First set up the figure, the axis, and the plot element we want to animate
                fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 6))
                plt.close()
                ax.xlim = (0, image.shape[1])
                ax.ylim = (0, image.shape[2])
                ax.set_xticks([])
                ax.set_yticks([])
                img = ax.imshow(image[0, :, :].T, cmap="gray")
                img.set_interpolation("nearest")

                # The animation function should take an index i
                def animate(i):
                    img.set_data(image[i, :, :].T)
                    sys.stdout.flush()
                    return (img,)

                bot.info("Generating animation...")
                anim = animation.FuncAnimation(
                    fig, animate, frames=image.shape[0], interval=50, blit=True
                )
                anim.save(
                    movie_file,
                    writer="ffmpeg",
                    fps=10,
                    dpi=100,
                    metadata={"title": title or "deid-animation"},
                )
                return movie_file
            else:
                bot.warning(
                    "save_animation() is only for 4D data. Use save_png instead."
                )
        else:
            bot.warning("use detect() --> clean() before saving is possible.")
Esempio n. 7
0
def save_identifiers(ids,output_folder=None):
    '''save_identifiers will parse over ids, and ensure that content
    is string (json serializable)
    '''
    if output_folder is None:
        output_folder = tempfile.mkdtemp()
    output_file = "%s/deid-ids.pkl" %output_folder
    bot.info("Writing ids to %s" %output_file)
    result = pickle.dump(ids,open(output_file,"wb"))
    return result
Esempio n. 8
0
def get_dataset(dataset=None):
    '''get_dataset will return some data provided by the application,
    based on a user-provided label. In the future, we can add https endpoints
    to retrieve online datasets.
    '''
    data_base = get_installdir()
    valid_datasets = {'dicom-cookies': '%s/data/dicom-cookies' % data_base}

    if dataset is not None:
        # In case the user gave an extension
        dataset = os.path.splitext(dataset)[0].lower()
        if dataset in valid_datasets:
            return valid_datasets[dataset]

    bot.info("Valid datasets include: %s" %
             (','.join(list(valid_datasets.keys()))))
Esempio n. 9
0
def get_dataset(dataset=None):
    """get_dataset will return some data provided by the application,
    based on a user-provided label. In the future, we can add https endpoints
    to retrieve online datasets.
    """
    data_base = get_installdir()
    valid_datasets = {
        "dicom-cookies": os.path.join(data_base, "data", "dicom-cookies"),
        "animals": os.path.join(data_base, "data", "animals"),
        "humans": os.path.join(data_base, "data", "humans"),
    }

    if dataset is not None:
        # In case the user gave an extension
        dataset = os.path.splitext(dataset)[0].lower()
        if dataset in valid_datasets:
            return valid_datasets[dataset]

    bot.info("Valid datasets include: %s" %
             (",".join(list(valid_datasets.keys()))))
Esempio n. 10
0
    def clean(self):
        '''
        take a dicom image and a list of pixel coordinates, and return
        a cleaned file (if output file is specified) or simply plot 
        the cleaned result (if no file is specified)
    
        Parameters
        ==========
            add_padding: add N=margin pixels of padding
            margin: pixels of padding to add, if add_padding True
        '''

        if not self.results:
            bot.warning('Use %s.detect() to find coordinates first.' % self)

        else:
            bot.info('Scrubbing %s.' % self.dicom_file)

            # Load in dicom file, and image data
            dicom = read_file(self.dicom_file, force=True)

            # We will set original image to image, cleaned to clean
            self.original = dicom.pixel_array
            self.cleaned = self.original.copy()

            # Compile coordinates from result
            coordinates = []
            for item in self.results['results']:
                if len(item['coordinates']) > 0:
                    for coordinate_set in item['coordinates']:
                        # Coordinates expected to be list separated by commas
                        new_coordinates = [
                            int(x) for x in coordinate_set.split(',')
                        ]
                        coordinates.append(
                            new_coordinates)  # [[1,2,3,4],...[1,2,3,4]]

            for coordinate in coordinates:
                minr, minc, maxr, maxc = coordinate
                self.cleaned[minc:maxc,
                             minr:maxr] = 0  # should fill with black
Esempio n. 11
0
def load_identifiers(identifiers_file):
    '''load_identifiers (currently) just loads a pickle file
    '''
    bot.info("Loading %s" %identifiers_file)
    result = pickle.load(open(identifiers_file,"rb"))
    return result
Esempio n. 12
0
    def clean(self, fix_interpretation=True, pixel_data_attribute="PixelData"):
        """
        take a dicom image and a list of pixel coordinates, and return
        a cleaned file (if output file is specified) or simply plot 
        the cleaned result (if no file is specified)
    
        Parameters
        ==========
            add_padding: add N=margin pixels of padding
            margin: pixels of padding to add, if add_padding True
            fix_interpretation: fix the photometric interpretation if found off
        """

        if not self.results:
            bot.warning("Use %s.detect() to find coordinates first." % self)

        else:
            bot.info("Scrubbing %s." % self.dicom_file)

            # Load in dicom file, and image data
            dicom = read_file(self.dicom_file, force=True)
            pixel_data = getattr(dicom, pixel_data_attribute)

            # Get expected and actual length of the pixel data (bytes, expected does not include trailing null byte)
            expected_length = get_expected_length(dicom)
            actual_length = len(pixel_data)
            padded_expected_length = expected_length + expected_length % 2
            full_length = expected_length / 2 * 3  # upsampled data is a third larger
            full_length += (1 if full_length % 2 else 0
                            )  # trailing padding byte if even length

            # If we have YBR_FULL_2, must be RGB to obtain pixel data
            if (not dicom.file_meta.TransferSyntaxUID.is_compressed
                    and dicom.PhotometricInterpretation == "YBR_FULL_422"
                    and fix_interpretation and actual_length >= full_length):
                bot.warning(
                    "Updating dicom.PhotometricInterpretation to RGB, set fix_interpretation to False to skip."
                )
                photometric_original = dicom.PhotometricInterpretation
                dicom.PhotometricInterpretation = "RGB"
                self.original = dicom.pixel_array
                dicom.PhotometricInterpretation = photometric_original
            else:
                self.original = dicom.pixel_array

            # Compile coordinates from result
            coordinates = []
            for item in self.results["results"]:
                if len(item["coordinates"]) > 0:
                    for coordinate_set in item["coordinates"]:
                        # Coordinates expected to be list separated by commas
                        new_coordinates = [
                            int(x) for x in coordinate_set.split(",")
                        ]
                        coordinates.append(
                            new_coordinates)  # [[1,2,3,4],...[1,2,3,4]]

            # Instead of writing directly to data, create a mask
            # For 4D, (frames, X, Y, channel)
            if len(self.original.shape) == 4:
                mask = numpy.zeros(self.original.shape[1:3], dtype=numpy.uint8)

            # For 3D, (X, Y, channel)
            else:
                mask = numpy.zeros(self.original.shape[0:2], dtype=numpy.uint8)

            for coordinate in coordinates:
                minr, minc, maxr, maxc = coordinate

                # Update the mask: values set to 0 to be black
                mask[minc:maxc, minr:maxr] = 1

            # Now apply finished mask to the data
            if len(self.original.shape) == 4:

                # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks
                # transposition to convert tile output (channel, X, Y)  into (X, Y, channel)
                # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154
                channel3mask = numpy.transpose(numpy.tile(mask, (3, 1, 1)),
                                               (1, 2, 0))

                # use numpy.tile to copy and stack the 3D masks into 4D array to apply to 4D pixel data
                # tile converts (X, Y, channels) -> (frames, X, Y, channels), presumed ordering for 4D pixel data
                final_mask = numpy.tile(channel3mask,
                                        (self.original.shape[0], 1, 1, 1))

                # apply final 4D mask to 4D pixel data
                self.cleaned = final_mask * self.original

            # greyscale: no need to stack into the channel dim since it doesnt exist
            elif len(self.original.shape) == 3:

                # numpy.tile converts (X, Y) -> (frames, X, Y)
                final_mask = numpy.tile(mask, (self.original.shape[0], 1, 1))
                self.cleaned = final_mask * self.original

            else:
                bot.warning("Pixel array dimension %s is not recognized." %
                            (self.original.shape))
Esempio n. 13
0
def check_item(item):
    '''print item fields and values to screen
    '''
    for key,val in item.items():
        bot.info("%s: %s" %(key,val))
Esempio n. 14
0
    def clean(self, fix_interpretation=True, pixel_data_attribute="PixelData"):
        """
        take a dicom image and a list of pixel coordinates, and return
        a cleaned file (if output file is specified) or simply plot
        the cleaned result (if no file is specified)

        Parameters
        ==========
            add_padding: add N=margin pixels of padding
            margin: pixels of padding to add, if add_padding True
            fix_interpretation: fix the photometric interpretation if found off
        """

        if not self.results:
            bot.warning("Use %s.detect() to find coordinates first." % self)

        else:
            bot.info("Scrubbing %s." % self.dicom_file)

            # Load in dicom file, and image data
            dicom = read_file(self.dicom_file, force=True)
            pixel_data = getattr(dicom, pixel_data_attribute)

            # Get expected and actual length of the pixel data (bytes, expected does not include trailing null byte)
            expected_length = get_expected_length(dicom)
            actual_length = len(pixel_data)
            padded_expected_length = expected_length + expected_length % 2
            full_length = expected_length / 2 * 3  # upsampled data is a third larger
            full_length += (
                1 if full_length % 2 else 0
            )  # trailing padding byte if even length

            # If we have YBR_FULL_2, must be RGB to obtain pixel data
            if (
                not dicom.file_meta.TransferSyntaxUID.is_compressed
                and dicom.PhotometricInterpretation == "YBR_FULL_422"
                and fix_interpretation
                and actual_length >= full_length
            ):
                bot.warning(
                    "Updating dicom.PhotometricInterpretation to RGB, set fix_interpretation to False to skip."
                )
                photometric_original = dicom.PhotometricInterpretation
                dicom.PhotometricInterpretation = "RGB"
                self.original = dicom.pixel_array
                dicom.PhotometricInterpretation = photometric_original
            else:
                self.original = dicom.pixel_array

            # Compile coordinates from result, generate list of tuples with coordinate and value
            # keepcoordinates == 1 (included in mask) and coordinates == 0 (remove).
            coordinates = []

            for item in self.results["results"]:

                # We iterate through coordinates in order specified in file
                for coordinate_set in item.get("coordinates", []):

                    # Each is a list with [value, coordinate]
                    mask_value, new_coordinates = coordinate_set

                    if not isinstance(new_coordinates, list):
                        new_coordinates = [new_coordinates]

                    for new_coordinate in new_coordinates:

                        # Case 1: an "all" indicates applying to entire image
                        if new_coordinate.lower() == "all":

                            # no frames, just X, Y
                            if len(self.original.shape) == 2:
                                # minr, minc, maxr, maxc = [0, 0, Y, X]
                                new_coordinate = [
                                    0,
                                    0,
                                    self.original.shape[1],
                                    self.original.shape[0],
                                ]

                            # (frames, X, Y, channel) OR (frames, X,Y)
                            if len(self.original.shape) >= 3:
                                new_coordinate = [
                                    0,
                                    0,
                                    self.original.shape[2],
                                    self.original.shape[1],
                                ]
                        else:
                            new_coordinate = [int(x) for x in new_coordinate.split(",")]
                        coordinates.append(
                            (mask_value, new_coordinate)
                        )  # [(1, [1,2,3,4]),...(0, [1,2,3,4])]

            # Instead of writing directly to data, create a mask of 1s (start keeping all)
            # For 4D, (frames, X, Y, channel)
            if len(self.original.shape) == 4:
                mask = numpy.ones(self.original.shape[1:3], dtype=numpy.uint8)

            # For 2D, (X, Y) or 3D (X, Y channel)
            else:
                mask = numpy.ones(self.original.shape[0:2], dtype=numpy.uint8)

            # Here we apply the coordinates to the mask, 1==keep, 0==clean
            for coordinate_value, coordinate in coordinates:
                minr, minc, maxr, maxc = coordinate

                # Update the mask: values set to 0 to be black
                mask[minc:maxc, minr:maxr] = coordinate_value

            # Now apply finished mask to the data
            if len(self.original.shape) == 4:

                # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks
                # transposition to convert tile output (channel, X, Y)  into (X, Y, channel)
                # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154
                channel3mask = numpy.transpose(numpy.tile(mask, (3, 1, 1)), (1, 2, 0))

                # use numpy.tile to copy and stack the 3D masks into 4D array to apply to 4D pixel data
                # tile converts (X, Y, channels) -> (frames, X, Y, channels), presumed ordering for 4D pixel data
                final_mask = numpy.tile(channel3mask, (self.original.shape[0], 1, 1, 1))

                # apply final 4D mask to 4D pixel data
                self.cleaned = final_mask * self.original

            # greyscale: no need to stack into the channel dim since it doesnt exist
            elif len(self.original.shape) == 3:

                # numpy.tile converts (X, Y) -> (frames, X, Y)
                final_mask = numpy.tile(mask, (self.original.shape[0], 1, 1))
                self.cleaned = final_mask * self.original

            elif len(self.original.shape) == 2:
                self.cleaned = mask * self.original

            else:
                bot.warning(
                    "Pixel array dimension %s is not recognized."
                    % (str(self.original.shape))
                )