def main(args, parser): """inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria """ # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format) ) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") dicom_files = list( get_files(base, pattern=args.pattern) ) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print("\nSUMMARY ================================\n") if result["clean"]: bot.custom( prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN" ) if result["flagged"]: for group, files in result["flagged"].items(): bot.flag("%s %s files" % (group, len(files))) if args.save: folders = "-".join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime("%y-%m-%d"), ) with open(outfile, "w") as filey: filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n") for clean in result["clean"]: filey.writelines("%s\tCLEAN\t\t\n" % clean) for flagged, details in result["flagged"].items(): if details["flagged"] is True: for result in details["results"]: group = result["group"] reason = result["reason"] filey.writelines( "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason) ) print("Result written to %s" % outfile)
def main(args, parser): '''inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria ''' # Global output folder #output_folder = args.outfolder #if output_folder is None: # output_folder = tempfile.mkdtemp() # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') dicom_files = list(get_files( base, pattern=args.pattern)) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print('\nSUMMARY ================================\n') if len(result['clean']) > 0: bot.custom(prefix='CLEAN', message="%s files" % len(result['clean']), color="CYAN") if len(result['flagged']) > 0: for group, files in result['flagged'].items(): bot.flag("%s %s files" % (group, len(files))) if args.save is True: folders = '-'.join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime('%y-%m-%d')) with open(outfile, 'w') as filey: filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n') for clean in result['clean']: filey.writelines('%s\tCLEAN\t\t\n' % clean) for flagged, details in result['flagged'].items(): if details['flagged'] is True: for result in details['results']: group = result['group'] reason = result['reason'] filey.writelines('%s\tFLAGGED\t%s\t%s\n' % (flagged, group, reason)) print('Result written to %s' % outfile)
#!/usr/bin/env python3 # This is a complete example of inspecting pixels for PHI # based on a deid.dicom specification # https://pydicom.github.io/deid # This will get a set of example cookie dicoms from deid.dicom import get_files, has_burned_pixels from pydicom import read_file from deid.data import get_dataset from deid.logger import bot import os bot.level = 3 base = get_dataset("dicom-cookies") dicom_files = list( get_files(base)) # todo : consider using generator functionality results = has_burned_pixels(dicom_files=dicom_files, deid="examples/deid") # The dictionary has a "clean" list, and a "flagged" list, # Eg: # {'clean': [], # 'flagged': {'/home/vanessa/Documents/Dropbox/Code/dicom/deid/deid/data/dicom-cookies/image1.dcm': {'flagged': True, # 'results': [{'coordinates': [], # 'group': 'blacklist', # 'reason': ' ImageType missing or ImageType empty '}]},
def import_dicomdir(dicom_dir, run_get_identifiers=True): '''import dicom directory manages importing a valid dicom set into the application, and is a celery job triggered by the watcher. Here we also flag (and disclude) images that have a header value that indicates pixel identifiers. ''' start_time = time.time() if os.path.exists(dicom_dir): try: dicom_files = ls_fullpath(dicom_dir) except NotADirectoryError: bot.error('%s is not a directory, skipping.' % dicom_dir) return bot.debug("Importing %s, found %s .dcm files" % (dicom_dir, len(dicom_files))) # The batch --> the folder with a set of dicoms tied to one request dcm_folder = os.path.basename(dicom_dir) batch, created = Batch.objects.get_or_create(uid=dcm_folder) batch.logs['STARTING_IMAGE_COUNT'] = len(dicom_files) # Data quality check: keep a record of study dates study_dates = dict() size_bytes = sum(os.path.getsize(f) for f in dicom_files) messages = [] # print all unique messages / warnings at end # Add in each dicom file to the series for dcm_file in dicom_files: try: # The dicom folder will be named based on the accession# dcm = read_file(dcm_file, force=True) dicom_uid = os.path.basename(dcm_file) # Keep track of studyDate study_date = dcm.get('StudyDate') if study_date not in study_dates: study_dates[study_date] = 0 study_dates[study_date] += 1 flag, flag_group, reason = has_burned_pixels( dicom_file=dcm_file, quiet=True, deid=STUDY_DEID) # If the image is flagged, we don't include and move on continue_processing = True if flag is True: if flag_group not in ["whitelist"]: continue_processing = False message = "%s is flagged in %s: %s, skipping" % ( dicom_uid, flag_group, reason) batch = add_batch_warning(message, batch, quiet=True) message = "BurnedInAnnotation found for batch %s" % batch.uid if message not in messages: messages.append(message) if continue_processing is True: # Create the Image object in the database # A dicom instance number must be unique for its batch dicom = Image.objects.create(batch=batch, uid=dicom_uid) # Save the dicom file to storage # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file)) dicom = save_image_dicom(dicom=dicom, dicom_file=dcm_file) # Also saves # Generate image name based on [SUID] added later # accessionnumberSUID.seriesnumber.imagenumber, name = "%s_%s.dcm" % (dcm.get('SeriesNumber'), dcm.get('InstanceNumber')) dicom.name = name dicom.save() # Only remove files successfully imported #os.remove(dcm_file) # Note that on error we don't remove files except InvalidDicomError: message = "InvalidDicomError: %s skipping." % (dcm_file) batch = add_batch_error(message, batch) except KeyError: message = "KeyError: %s is possibly invalid, skipping." % ( dcm_file) batch = add_batch_error(message, batch) except Exception as e: message = "Exception: %s, for %s, skipping." % (e, dcm_file) # Print summary messages all at once for message in messages: bot.warning(message) if len(study_dates) > 1: message = "% study dates found for %s" % (len(study_dates), dcm_file) batch = add_batch_error(message, batch) # Save batch thus far batch.qa['StudyDate'] = study_dates batch.qa['StartTime'] = start_time batch.qa['SizeBytes'] = size_bytes batch.save() # If there were no errors on import, we should remove the directory #if not batch.has_error: # Should only be called given no error, and should trigger error if not empty #os.rmdir(dicom_dir) # At the end, submit the dicoms to be anonymized as a batch count = batch.image_set.count() if count > 0: if ANONYMIZE_PIXELS is True: bot.warning( "Anonimization of pixels is not yet implemented. Images were skipped." ) # When this is implemented, the function will be modified to add these images # to the batch, which will then be first sent through a function to # scrub pixels before header data is looked at. # scrub_pixels(bid=batch.id) #else: if run_get_identifiers is True: bot.debug("get_identifiers submit batch %s with %s dicoms." % (batch.uid, count)) return get_identifiers(bid=batch.id) else: bot.debug("Finished batch %s with %s dicoms" % (batch.uid, count)) return batch else: # No images for further processing batch.status = "EMPTY" batch.qa['FinishTime'] = time.time() message = "%s is flagged EMPTY, no images pass filter" % (batch.id) batch = add_batch_warning(message, batch) batch.save() return else: bot.warning('Cannot find %s' % dicom_dir)