예제 #1
0
    def test_get_files(self):
        print("Case 1: Test get files from dataset")
        from deid.dicom import get_files
        from deid.config import load_deid
        dicom_files = get_files(self.dataset)
        self.assertEqual(len(dicom_files), 7)

        print("Case 2: Ask for files from empty folder")
        dicom_files = get_files(self.tmpdir)
        self.assertEqual(len(dicom_files), 0)
예제 #2
0
def get_dicom(dataset):
    '''helper function to load a dicom
    '''
    from deid.dicom import get_files
    from pydicom import read_file
    dicom_files = get_files(dataset)
    return read_file(next(dicom_files))
예제 #3
0
def get_file(dataset):
    """helper to get a dicom file 
    """
    from deid.dicom import get_files

    dicom_files = get_files(dataset)
    return next(dicom_files)
예제 #4
0
    def test_get_files_as_list(self):
        print("Test test_get_files_as_list")
        print("Case 1: Test get files from dataset")
        from deid.dicom import get_files
        from deid.config import load_deid

        dicom_files = list(get_files(self.dataset))
        found = len(dicom_files)
        expected = 7
        self.assertEqual(found, expected)

        print("Case 2: Ask for files from empty folder")
        dicom_files = list(get_files(self.tmpdir))
        found = len(dicom_files)
        expected = 0
        self.assertEqual(found, expected)
예제 #5
0
def main(args, parser):

    # Global output folder
    output_folder = args.outfolder
    if output_folder is None:
        output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    if args.deid is not None:
        params = load_deid(args.deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params["format"], args.format))
    # Get list of dicom files
    base = args.input
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")
    basename = os.path.basename(base)
    dicom_files = list(
        get_files(base))  # todo : consider using generator functionality

    do_get = False
    do_put = False
    ids = None
    if args.action == "all":
        bot.info("GET and PUT identifiers from %s" % (basename))
        do_get = True
        do_put = True

    elif args.action == "get":
        do_get = True
        bot.info("GET and PUT identifiers from %s" % (basename))

    elif args.action == "put":
        bot.info("PUT identifiers from %s" % (basename))
        do_put = True
        if args.ids is None:
            bot.exit(
                "To PUT without GET you must provide a json file with ids.")

        ids = args.ids

    # GET identifiers

    if do_get is True:
        ids = get_identifiers(dicom_files)

    if do_put is True:
        cleaned_files = replace_identifiers(
            dicom_files=dicom_files,
            ids=ids,
            deid=args.deid,
            overwrite=args.overwrite,
            output_folder=output_folder,
        )

        bot.info("%s %s files at %s" %
                 (len(cleaned_files), args.format, output_folder))
예제 #6
0
def main(args, parser):
    """inspect currently serves to inspect the header fields of a set
       of dicom files against a standard, and flag images that don't
       pass the different levels of criteria
    """

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting."
                % (params["format"], args.format)
            )
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")

    dicom_files = list(
        get_files(base, pattern=args.pattern)
    )  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print("\nSUMMARY ================================\n")
    if result["clean"]:
        bot.custom(
            prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN"
        )

    if result["flagged"]:
        for group, files in result["flagged"].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save:
        folders = "-".join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders,
            datetime.datetime.now().strftime("%y-%m-%d"),
        )

        with open(outfile, "w") as filey:
            filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n")

            for clean in result["clean"]:
                filey.writelines("%s\tCLEAN\t\t\n" % clean)

            for flagged, details in result["flagged"].items():
                if details["flagged"] is True:
                    for result in details["results"]:
                        group = result["group"]
                        reason = result["reason"]
                        filey.writelines(
                            "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason)
                        )

            print("Result written to %s" % outfile)
예제 #7
0
    def test_get_files(self):
        print("Test test_get_files")
        print("Case 1: Test get files from dataset")
        from deid.dicom import get_files
        from deid.config import load_deid
        found = 0
        for dicom_file in get_files(self.dataset):
            found += 1
        expected = 7
        self.assertEqual(found, expected)

        print("Case 2: Ask for files from empty folder")
        found = 0
        for dicom_file in get_files(self.tmpdir):
            found += 1
        expected = 0
        self.assertEqual(found, expected)
예제 #8
0
파일: inspect.py 프로젝트: rjosest/deid
def main(args, parser):
    '''inspect currently serves to inspect the header fields of a set
    of dicom files against a standard, and flag images that don't
    pass the different levels of criteria
    '''

    # Global output folder
    #output_folder = args.outfolder
    #if output_folder is None:
    #    output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params['format'] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params['format'], args.format))
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset('dicom-cookies')

    dicom_files = list(get_files(
        base,
        pattern=args.pattern))  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print('\nSUMMARY ================================\n')
    if len(result['clean']) > 0:
        bot.custom(prefix='CLEAN',
                   message="%s files" % len(result['clean']),
                   color="CYAN")

    if len(result['flagged']) > 0:
        for group, files in result['flagged'].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save is True:
        folders = '-'.join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders, datetime.datetime.now().strftime('%y-%m-%d'))
        with open(outfile, 'w') as filey:
            filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n')
            for clean in result['clean']:
                filey.writelines('%s\tCLEAN\t\t\n' % clean)
            for flagged, details in result['flagged'].items():
                if details['flagged'] is True:
                    for result in details['results']:
                        group = result['group']
                        reason = result['reason']
                        filey.writelines('%s\tFLAGGED\t%s\t%s\n' %
                                         (flagged, group, reason))

            print('Result written to %s' % outfile)
예제 #9
0
def get_dicom(dataset, return_dir=False):
    '''helper function to load a dicom
    '''
    from deid.dicom import get_files
    from pydicom import read_file
    dicom_files = get_files(dataset)
    if return_dir:
        return dicom_files
    return read_file(dicom_files[0])
예제 #10
0
def get_dicom(dataset, return_dir=False):
    """helper function to load a dicom"""
    from deid.dicom import get_files
    from pydicom import read_file

    dicom_files = get_files(dataset)
    if return_dir:
        return list(dicom_files)
    return read_file(next(dicom_files))
예제 #11
0
#!/usr/bin/env python3

# This is a complete example of inspecting pixels for PHI
# based on a deid.dicom specification
# https://pydicom.github.io/deid

# This will get a set of example cookie dicoms
from deid.dicom import get_files, has_burned_pixels
from pydicom import read_file
from deid.data import get_dataset
from deid.logger import bot
import os

bot.level = 3

base = get_dataset("dicom-cookies")
dicom_files = list(
    get_files(base))  # todo : consider using generator functionality

results = has_burned_pixels(dicom_files=dicom_files, deid="examples/deid")

# The dictionary has a "clean" list, and a "flagged" list,
# Eg:

# {'clean': [],
#  'flagged': {'/home/vanessa/Documents/Dropbox/Code/dicom/deid/deid/data/dicom-cookies/image1.dcm': {'flagged': True,
#  'results': [{'coordinates': [],
#               'group': 'blacklist',
#               'reason': ' ImageType missing  or ImageType empty '}]},
예제 #12
0
# RADIOLOGY ---------------------------------------------------
# This is an example script to upload data (images, text, metadata) to
# google cloud storage and datastore. Data MUST be de-identified

som.api.google.datastore import DataStoreClient as Client
import os

# Start google storage client for pmc-stanford
client = Client(bucket_name='radiology')
collection = client.create_collection(uid='IRB41449')

# Let's load some dummy data from deid
from deid.data import get_dataset
from deid.dicom import get_files
dicom_files = get_files(get_dataset('dicom-cookies'))

# Now de-identify to get clean files
from deid.dicom import get_identifiers, replace_identifiers
ids=get_identifiers(dicom_files)
updated_files = replace_identifiers(dicom_files=dicom_files,
                                    ids=ids)

# Define some metadata for the entity
metadata = { "source_id" : "cookieTumorDatabase",
             "id":"cookie-47",
             "Modality": "cookie"}

# Upload the dataset
client.upload_dataset(images=updated_files,
                      collection=collection,
예제 #13
0
def main():

    parser = get_parser()

    try:
        args = parser.parse_args()
    except:
        sys.exit(0)

    from deid.dicom import (get_files, DicomCleaner)
    from logger import bot

    if args.folder is None:
        bot.error("Please provide a folder with dicom files with --input.")
        sys.exit(1)

    # This is an iterator, so must convert to list to get length
    dicom_files = get_files(args.folder)

    # Create a Dicom Cleaner client
    number_files = len(list(get_files(args.folder)))

    client = DicomCleaner(output_folder=args.outfolder, deid=args.deid)
    bot.info('Processing [images]%s [output-folder]%s' %
             (number_files, client.output_folder))
    outcomes = {True: 'flagged', False: '  clean'}

    # Keep a list of flagged and clean
    flagged = []
    clean = []
    summary = dict()

    # We will move images into respective folders
    if args.save is "pdf":
        pdf_report = '%s/deid-clean-%s.pdf' % (args.outfolder, number_files)
        pp = PdfPages(pdf_report)

    # Perform detection one at a time
    for dicom_file in dicom_files:

        dicom_name = os.path.basename(dicom_file)

        # detect --> clean
        result = client.detect(dicom_file)
        client.clean()
        summary[dicom_name] = result

        # Generate title/description for result
        title = '%s: %s' % (outcomes[result['flagged']], dicom_name)
        bot.info(title)

        # How does the user want to save data?
        if args.save == "dicom":
            outfile = client.save_dicom()

        elif args.save == "png":
            outfile = client.save_png(title=title)

        # pdf (default)
        else:
            plt = client.get_figure(title=title)
            fig = plt.gcf()
            pp.savefig(fig)
            plt.close(fig)

        # Whether dicom or png, append to respective list
        if args.save is not "pdf":
            if result['flagged']:
                flagged.append(outfile)
            else:
                clean.append(outfile)

    # Save summary json file
    summary_json = '%s/deid-clean-%s.json' % (args.outfolder, number_files)
    write_json(summary, summary_json)
    bot.info('json data written to %s' % summary_json)

    # When we get here, if saving pdf, close it.
    if args.save == "pdf":
        bot.info('pdf report written to %s' % pdf_report)
        pp.close()

    # Otherwise, move files into respective folders
    else:
        move_files(files=flagged, dest='%s/flagged' % args.outfolder)
        move_files(files=cleaned, dest='%s/clean' % args.outfolder)
예제 #14
0
# This is a complete example of using the cleaning client to inspect
# and clean pixels
# based on a deid.dicom specification
# https://pydicom.github.io/deid

#########################################
# 1. Get List of Files
#########################################

# This will get a set of example cookie dicoms
from deid.dicom import get_files
from deid.data import get_dataset

base = get_dataset('dicom-cookies')
dicom_files = get_files(base)
dicom_file = dicom_files[3]

#########################################
# 2. Create Client
#########################################

client = DicomCleaner()

# You can set the output folder if you want, otherwis tmpdir is used
client = DicomCleaner(output_folder='/home/vanessa/Desktop')

# Steps are to detect, clean, and save in desired format, one image
# at a time.
# client.detect(dicom_file)
# client.clean()
def get_file(dataset, image, tempdir=None):
    """helper to get a dicom file"""
    from deid.dicom import get_files

    dicom_files = get_files(dataset, pattern=image, tempdir=tempdir)
    return next(dicom_files)
예제 #16
0
def main():

    parser = get_parser()

    try:
        args = parser.parse_args()
    except:
        sys.exit(0)

    from deid.dicom import get_files
    from logger import bot

    if args.folder is None:
        bot.error("Please provide a folder with dicom files with --input.")
        sys.exit(1)

    dicom_files = get_files(args.folder)
    number_files = len(list(get_files(args.folder)))

    ##### the following includes all steps to go from raw images to predictions
    ##### pickle models are passed as argument to select_text_among_candidates
    ##### and classify_text methods are result of a previously implemented pipeline.
    ##### just for the purpose of clarity the previous code is provided.
    ##### The commented code is the one necessary to get the models trained.

    # Keep a record for the user
    result = {'clean': 0, 'detected': 0, 'skipped': 0, 'total': number_files}

    # For each file, determine if PHI, for now just alert user
    for dicom_file in dicom_files:

        dicom_name = os.path.basename(dicom_file)

        # Try isn't a great approach, but if we log the skipped, we can debug
        try:
            dicom = UserData(dicom_file, verbose=args.verbose)

            # plots preprocessed image
            if not args.detect:
                dicom_save_name = '/data/%s_preprocessed.png' % dicom_name
                dicom.save_preprocessed_image(dicom_save_name)

            # detects objects in preprocessed image
            candidates = dicom.get_text_candidates()
            clean = True

            if candidates is not None:

                if args.verbose:
                    number_candidates = len(candidates['coordinates'])
                    bot.debug("%s has %s text candidates" %
                              (dicom_name, number_candidates))
                # plots objects detected
                # dicom.plot_to_check_save(candidates,
                #                          'Total Objects Detected',
                #                          '/data/lao-detect-check.png')

                # selects objects containing text
                saved_model = '/code/data/linearsvc-hog-fulltrain2-90.pickle'
                maybe_text = dicom.select_text_among_candidates(saved_model)

                # plots objects after text detection
                # dicom.plot_to_check_save(maybe_text,
                #                          'Objects Containing Text Detected',
                #                          '/data/lao-detect-candidates.png')

                # classifies single characters
                saved_model = '/code/data/linearsvc-hog-fulltrain36-90.pickle'
                classified = dicom.classify_text(saved_model)
                if args.verbose:
                    number_text = len(classified['coordinates'])
                    bot.debug("%s has %s classified text" %
                              (dicom_name, number_text))

                if len(classified) > 0:
                    if args.verbose:
                        bot.warning("%s flagged for text content." %
                                    dicom_name)
                    clean = False
                else:
                    bot.info("%s is clean" % dicom_name)

            else:
                bot.info("%s is clean" % dicom_name)

            if clean:
                result['clean'] += 1
            else:
                result['detected'] += 1

            # plots letters after classification
            # dicom.plot_to_check_save(classified,
            #                          'Single Character Recognition',
            #                           '/data/lao-detect-letters.png')

            if not clean and not args.detect:
                dicom.scrape_save('/data/%s_cleaned.png' % dicom_name)

        except:
            bot.error("\nProblem loading %s, skipping" % dicom_name)
            result['skipped'] += 1
        print('============================================================')

    # Final result
    print('\n=======================FINALRESULT==========================')
    print(os.path.basename(args.folder))
    print("DETECTED: %s" % result['detected'])
    print("SKIPPED:  %s" % result['skipped'])
    print("CLEAN:    %s" % result['clean'])
    print("TOTAL:    %s" % result['total'])