Esempio n. 1
0
def main(args, parser):
    """inspect currently serves to inspect the header fields of a set
       of dicom files against a standard, and flag images that don't
       pass the different levels of criteria
    """

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting."
                % (params["format"], args.format)
            )
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")

    dicom_files = list(
        get_files(base, pattern=args.pattern)
    )  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print("\nSUMMARY ================================\n")
    if result["clean"]:
        bot.custom(
            prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN"
        )

    if result["flagged"]:
        for group, files in result["flagged"].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save:
        folders = "-".join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders,
            datetime.datetime.now().strftime("%y-%m-%d"),
        )

        with open(outfile, "w") as filey:
            filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n")

            for clean in result["clean"]:
                filey.writelines("%s\tCLEAN\t\t\n" % clean)

            for flagged, details in result["flagged"].items():
                if details["flagged"] is True:
                    for result in details["results"]:
                        group = result["group"]
                        reason = result["reason"]
                        filey.writelines(
                            "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason)
                        )

            print("Result written to %s" % outfile)
Esempio n. 2
0
def main(args, parser):
    '''inspect currently serves to inspect the header fields of a set
    of dicom files against a standard, and flag images that don't
    pass the different levels of criteria
    '''

    # Global output folder
    #output_folder = args.outfolder
    #if output_folder is None:
    #    output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params['format'] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params['format'], args.format))
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset('dicom-cookies')

    dicom_files = list(get_files(
        base,
        pattern=args.pattern))  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print('\nSUMMARY ================================\n')
    if len(result['clean']) > 0:
        bot.custom(prefix='CLEAN',
                   message="%s files" % len(result['clean']),
                   color="CYAN")

    if len(result['flagged']) > 0:
        for group, files in result['flagged'].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save is True:
        folders = '-'.join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders, datetime.datetime.now().strftime('%y-%m-%d'))
        with open(outfile, 'w') as filey:
            filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n')
            for clean in result['clean']:
                filey.writelines('%s\tCLEAN\t\t\n' % clean)
            for flagged, details in result['flagged'].items():
                if details['flagged'] is True:
                    for result in details['results']:
                        group = result['group']
                        reason = result['reason']
                        filey.writelines('%s\tFLAGGED\t%s\t%s\n' %
                                         (flagged, group, reason))

            print('Result written to %s' % outfile)
Esempio n. 3
0
#!/usr/bin/env python3

# This is a complete example of inspecting pixels for PHI
# based on a deid.dicom specification
# https://pydicom.github.io/deid

# This will get a set of example cookie dicoms
from deid.dicom import get_files, has_burned_pixels
from pydicom import read_file
from deid.data import get_dataset
from deid.logger import bot
import os

bot.level = 3

base = get_dataset("dicom-cookies")
dicom_files = list(
    get_files(base))  # todo : consider using generator functionality

results = has_burned_pixels(dicom_files=dicom_files, deid="examples/deid")

# The dictionary has a "clean" list, and a "flagged" list,
# Eg:

# {'clean': [],
#  'flagged': {'/home/vanessa/Documents/Dropbox/Code/dicom/deid/deid/data/dicom-cookies/image1.dcm': {'flagged': True,
#  'results': [{'coordinates': [],
#               'group': 'blacklist',
#               'reason': ' ImageType missing  or ImageType empty '}]},
Esempio n. 4
0
def import_dicomdir(dicom_dir, run_get_identifiers=True):
    '''import dicom directory manages importing a valid dicom set into 
    the application, and is a celery job triggered by the watcher. 
    Here we also flag (and disclude) images that have a header value 
    that indicates pixel identifiers.
    '''
    start_time = time.time()

    if os.path.exists(dicom_dir):
        try:
            dicom_files = ls_fullpath(dicom_dir)
        except NotADirectoryError:
            bot.error('%s is not a directory, skipping.' % dicom_dir)
            return

        bot.debug("Importing %s, found %s .dcm files" %
                  (dicom_dir, len(dicom_files)))

        # The batch --> the folder with a set of dicoms tied to one request
        dcm_folder = os.path.basename(dicom_dir)
        batch, created = Batch.objects.get_or_create(uid=dcm_folder)
        batch.logs['STARTING_IMAGE_COUNT'] = len(dicom_files)

        # Data quality check: keep a record of study dates
        study_dates = dict()
        size_bytes = sum(os.path.getsize(f) for f in dicom_files)
        messages = []  # print all unique messages / warnings at end

        # Add in each dicom file to the series
        for dcm_file in dicom_files:
            try:

                # The dicom folder will be named based on the accession#
                dcm = read_file(dcm_file, force=True)
                dicom_uid = os.path.basename(dcm_file)

                # Keep track of studyDate
                study_date = dcm.get('StudyDate')
                if study_date not in study_dates:
                    study_dates[study_date] = 0
                study_dates[study_date] += 1
                flag, flag_group, reason = has_burned_pixels(
                    dicom_file=dcm_file, quiet=True, deid=STUDY_DEID)

                # If the image is flagged, we don't include and move on
                continue_processing = True
                if flag is True:
                    if flag_group not in ["whitelist"]:
                        continue_processing = False
                        message = "%s is flagged in %s: %s, skipping" % (
                            dicom_uid, flag_group, reason)

                        batch = add_batch_warning(message, batch, quiet=True)
                        message = "BurnedInAnnotation found for batch %s" % batch.uid
                        if message not in messages:
                            messages.append(message)

                if continue_processing is True:
                    # Create the Image object in the database
                    # A dicom instance number must be unique for its batch
                    dicom = Image.objects.create(batch=batch, uid=dicom_uid)

                    # Save the dicom file to storage
                    # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file))
                    dicom = save_image_dicom(dicom=dicom,
                                             dicom_file=dcm_file)  # Also saves

                    # Generate image name based on [SUID] added later
                    # accessionnumberSUID.seriesnumber.imagenumber,
                    name = "%s_%s.dcm" % (dcm.get('SeriesNumber'),
                                          dcm.get('InstanceNumber'))
                    dicom.name = name
                    dicom.save()
                    # Only remove files successfully imported
                    #os.remove(dcm_file)

            # Note that on error we don't remove files
            except InvalidDicomError:
                message = "InvalidDicomError: %s skipping." % (dcm_file)
                batch = add_batch_error(message, batch)
            except KeyError:
                message = "KeyError: %s is possibly invalid, skipping." % (
                    dcm_file)
                batch = add_batch_error(message, batch)
            except Exception as e:
                message = "Exception: %s, for %s, skipping." % (e, dcm_file)

        # Print summary messages all at once
        for message in messages:
            bot.warning(message)

        if len(study_dates) > 1:
            message = "% study dates found for %s" % (len(study_dates),
                                                      dcm_file)
            batch = add_batch_error(message, batch)

        # Save batch thus far
        batch.qa['StudyDate'] = study_dates
        batch.qa['StartTime'] = start_time
        batch.qa['SizeBytes'] = size_bytes
        batch.save()

        # If there were no errors on import, we should remove the directory
        #if not batch.has_error:

        # Should only be called given no error, and should trigger error if not empty
        #os.rmdir(dicom_dir)

        # At the end, submit the dicoms to be anonymized as a batch
        count = batch.image_set.count()
        if count > 0:
            if ANONYMIZE_PIXELS is True:
                bot.warning(
                    "Anonimization of pixels is not yet implemented. Images were skipped."
                )
                # When this is implemented, the function will be modified to add these images
                # to the batch, which will then be first sent through a function to
                # scrub pixels before header data is looked at.
                # scrub_pixels(bid=batch.id)
            #else:
            if run_get_identifiers is True:
                bot.debug("get_identifiers submit batch %s with %s dicoms." %
                          (batch.uid, count))
                return get_identifiers(bid=batch.id)
            else:
                bot.debug("Finished batch %s with %s dicoms" %
                          (batch.uid, count))
                return batch
        else:
            # No images for further processing
            batch.status = "EMPTY"
            batch.qa['FinishTime'] = time.time()
            message = "%s is flagged EMPTY, no images pass filter" % (batch.id)
            batch = add_batch_warning(message, batch)
            batch.save()
            return

    else:
        bot.warning('Cannot find %s' % dicom_dir)