def change_tag(dicom, field, value): """change tag is a general function that can be used by update_tag or add_tag. The only difference is the print output, and determining to call the function based on different conditions Parameters ========== dicom: the pydicom.dataset Dataset (pydicom.read_file) field: the name of the field to add value: the value to set, if name is a valid tag """ # Case 1: Dealing with a string tag (field name) if isinstance(field, str): tag = get_tag(field) if field in tag: dicom.add_new(tag[field]["tag"], tag[field]["VR"], value) else: bot.error("%s is not a valid field to add. Skipping." % (field)) # Case 2: we already have a tag for the field name (type BaseTag) else: tag = dicom.get(field) dicom.add_new(field, tag.VR, value) return dicom
def load_combined_deid(deids): '''load one or more deids, either based on a path or a tag Parameters ========== deids: should be a custom list of deids ''' if not isinstance(deids, list): bot.warning("load_combined_deids expects a list.") sys.exit(1) found_format = None deid = None for single_deid in deids: # If not a tag or path, returns None next_deid = get_deid(tag=single_deid, exit_on_fail=False, quiet=True, load=True) if next_deid is not None: # Formats must match if found_format is None: found_format = next_deid['format'] else: if found_format != next_deid['format']: bot.error('Mismatch in deid formats, %s and %s' % (found_format, next_deid['format'])) sys.exit(1) # If it's the first one, use as starter template if deid is None: deid = next_deid else: # Update filter, appending to end to give first preference if "filter" in next_deid: if "filter" not in deid: deid['filter'] = next_deid['filter'] else: for name, group in next_deid['filter'].items(): if name in deid['filter']: deid['filter'][ name] = deid['filter'][name] + group else: deid['filter'][name] = group if "header" in next_deid: if "header" not in deid: deid['header'] = next_deid['header'] else: deid['header'] = deid['header'] + next_deid['header'] else: bot.warning('Problem loading %s, skipping.' % single_deid) return deid
def find_deid(path=None): '''find_deid is a helper function to load_deid to find a deid file in a folder, or return the path provided if it is the file. Parameters ========== path: a path on the filesystem. If not provided, will assume PWD. ''' if path is None: path = os.getcwd() # The user has provided a directory if os.path.isdir(path): contenders = [ "%s/%s" % (path, x) for x in os.listdir(path) if x.startswith('deid') ] if len(contenders) == 0: bot.error("No deid settings files found in %s, exiting." % (path)) sys.exit(1) elif len(contenders) > 1: bot.warning("Multiple deid files found in %s, will use first." % (path)) path = contenders[0] # We have a file path at this point if not os.path.exists(path): bot.error("Cannot find deid file %s, exiting." % (path)) sys.exit(1) return path
def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.exit( "To PUT without GET you must provide a json file with ids.") ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if do_put is True: cleaned_files = replace_identifiers( dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder, ) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))
def get_identifiers( dicom_files, force=True, config=None, strip_sequences=False, remove_private=False ): """ extract all identifiers from a dicom image. This function returns a lookup by file name, where each value indexed includes a dictionary of nested fields (indexed by nested tag). Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) config: if None, uses default in provided module folder strip_sequences: if True, remove all sequences remove_private: remove private tags """ if config is None: config = "%s/config.json" % here if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] bot.debug("Extracting identifiers for %s dicom" % len(dicom_files)) lookup = dict() # Parse each dicom file for dicom_file in dicom_files: parser = DicomParser(dicom_file, force=force) lookup[parser.dicom_file] = parser.get_fields() return lookup
def update_tag(dicom, field, value): """update tag will update a value in the header, if it exists if not, nothing is added. This check is the only difference between this function and change_tag. If the user wants to add a value (that might not exist) the function add_tag should be used with a private identifier as a string. Parameters ========== dicom: the pydicom.dataset Dataset (pydicom.read_file) field: the name of the field to update value: the value to set, if name is a valid tag """ if field not in dicom: return dicom # Case 1: Dealing with a string tag (field name) if isinstance(field, str): tag = get_tag(field) if tag: dicom.add_new(tag["tag"], tag["VR"], value) else: bot.error("%s is not a valid field to add. Skipping." % (field)) # Case 2: we already have a tag for the field name (type BaseTag) else: tag = dicom.get(field) dicom.add_new(field, tag.VR, value) return dicom
def _prepare_replace_config(dicom_files, deid=None, config=None): """ replace identifiers will replace dicom_files with data from ids based on a combination of a config (default is remove all) and a user deid Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) save: if True, save to file. Otherwise, return dicom objects config: if None, uses default in provided module folder overwrite: if False, save updated files to temporary directory """ if config is None: config = "%s/config.json" % (here) if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) if not isinstance(deid, DeidRecipe): deid = DeidRecipe(deid) config = read_json(config, ordered_dict=True) if not isinstance(dicom_files, list): dicom_files = [dicom_files] return dicom_files, deid, config
def save_dicom(dicom, dicom_file, output_folder=None, overwrite=False): '''save_dicom will save a dicom file to an output folder, making sure to not overwrite unless the user has enforced it Parameters ========== dicom: the pydicon Dataset to save dicom_file: the path to the dicom file to save (we only use basename) output_folder: the folder to save the file to overwrite: overwrite any existing file? (default is False) ''' if output_folder is None: if overwrite is False: output_folder = tempfile.mkdtemp() else: output_folder = os.path.dirname(dicom_file) dicom_name = os.path.basename(dicom_file) output_dicom = os.path.join(output_folder,dicom_name) dowrite = True if overwrite is False: if os.path.exists(output_dicom): bot.error("%s already exists, overwrite set to False. Not writing." %dicom_name) dowrite = False if dowrite: dicom.save_as(output_dicom) return output_dicom
def main(args, parser): """inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria """ # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format) ) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") dicom_files = list( get_files(base, pattern=args.pattern) ) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print("\nSUMMARY ================================\n") if result["clean"]: bot.custom( prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN" ) if result["flagged"]: for group, files in result["flagged"].items(): bot.flag("%s %s files" % (group, len(files))) if args.save: folders = "-".join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime("%y-%m-%d"), ) with open(outfile, "w") as filey: filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n") for clean in result["clean"]: filey.writelines("%s\tCLEAN\t\t\n" % clean) for flagged, details in result["flagged"].items(): if details["flagged"] is True: for result in details["results"]: group = result["group"] reason = result["reason"] filey.writelines( "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason) ) print("Result written to %s" % outfile)
def main(args, parser): '''inspect currently serves to inspect the header fields of a set of dicom files against a standard, and flag images that don't pass the different levels of criteria ''' # Global output folder #output_folder = args.outfolder #if output_folder is None: # output_folder = tempfile.mkdtemp() # If a deid is given, check against format deid = args.deid if deid is not None: params = load_deid(deid) if params['format'] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params['format'], args.format)) # Get list of dicom files base = args.folder if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset('dicom-cookies') dicom_files = list(get_files( base, pattern=args.pattern)) # todo : consider using generator functionality result = has_burned_pixels(dicom_files, deid=deid) print('\nSUMMARY ================================\n') if len(result['clean']) > 0: bot.custom(prefix='CLEAN', message="%s files" % len(result['clean']), color="CYAN") if len(result['flagged']) > 0: for group, files in result['flagged'].items(): bot.flag("%s %s files" % (group, len(files))) if args.save is True: folders = '-'.join([os.path.basename(folder) for folder in base]) outfile = "pixel-flag-results-%s-%s.tsv" % ( folders, datetime.datetime.now().strftime('%y-%m-%d')) with open(outfile, 'w') as filey: filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n') for clean in result['clean']: filey.writelines('%s\tCLEAN\t\t\n' % clean) for flagged, details in result['flagged'].items(): if details['flagged'] is True: for result in details['results']: group = result['group'] reason = result['reason'] filey.writelines('%s\tFLAGGED\t%s\t%s\n' % (flagged, group, reason)) print('Result written to %s' % outfile)
def remove_private(self): """Remove private tags from the loaded dicom""" try: self.dicom.remove_private_tags() except: bot.error( """Private tags for %s could not be completely removed, usually this is due to invalid data type. Removing others.""" % self.dicom_name) for ptag in get_private(self.dicom): del self.dicom[ptag.tag]
def get_identifiers(dicom_files, force=True, config=None, expand_sequences=True, skip_fields=None): """ extract all identifiers from a dicom image. This function returns a lookup by file name, and does not include private tags. Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) config: if None, uses default in provided module folder expand_sequences: if True, expand sequences. Otherwise, skips skip_fields: if not None, added fields to skip """ if config is None: config = "%s/config.json" % here if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] bot.debug("Extracting identifiers for %s dicom" % len(dicom_files)) ids = dict() # identifiers # We will skip PixelData skip = config["skip"] if skip_fields is not None: if not isinstance(skip_fields, list): skip_fields = [skip_fields] skip = skip + skip_fields for dicom_file in dicom_files: if isinstance(dicom_file, Dataset): dicom = dicom_file dicom_file = dicom.filename else: dicom = read_file(dicom_file, force=force) if dicom_file not in ids: ids[dicom_file] = dict() ids[dicom_file] = get_fields(dicom, skip=skip, expand_sequences=expand_sequences) return ids
def change_tag(dicom, field, value): '''change tag is a general function that can be used by update_tag or add_tag. The only difference is the print output, and determining to call the function based on different conditions ''' tag = get_tag(field) if field in tag: dicom.add_new(tag[field]['tag'], tag[field]['VR'], value) else: bot.error("%s is not a valid field to add. Skipping." % (field)) return dicom
def get_identifiers(dicom_files, force=True, config=None, expand_sequences=True, skip_fields=None): ''' extract all identifiers from a dicom image. This function returns a lookup by file name Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) config: if None, uses default in provided module folder expand_sequences: if True, expand sequences. otherwise, skips skip_fields: if not None, added fields to skip ''' bot.debug('Extracting identifiers for %s dicom' %(len(dicom_files))) if config is None: config = "%s/config.json" %(here) if not os.path.exists(config): bot.error("Cannot find config %s, exiting" %(config)) config = read_json(config, ordered_dict=True)['get'] if not isinstance(dicom_files,list): dicom_files = [dicom_files] ids = dict() # identifiers # We will skip PixelData skip = config['skip'] if skip_fields is not None: if not isinstance(skip_fields,list): skip_fields = [skip_fields] skip = skip + skip_fields for dicom_file in dicom_files: dicom = read_file(dicom_file,force=True) if dicom_file not in ids: ids[dicom_file] = dict() ids[dicom_file] = get_fields(dicom, skip=skip, expand_sequences=expand_sequences) return ids
def __init__(self, dicom_file, recipe=None, config=None, force=True): # Lookup for the dicom self.lookup = {} # Will be a list of DicomField self.fields = {} # Load default configuration, or a custom one config = config or os.path.join(here, "config.json") if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) self.config = read_json(config, ordered_dict=True) # Deid can be a recipe or filename if not isinstance(recipe, DeidRecipe): recipe = DeidRecipe(recipe) self.load(dicom_file, force=force) self.recipe = recipe
def change_tag(dicom, field, value): '''change tag is a general function that can be used by update_tag or add_tag. The only difference is the print output, and determining to call the function based on different conditions Parameters ========== dicom: the pydicom.dataset Dataset (pydicom.read_file) field: the name of the field to add value: the value to set, if name is a valid tag ''' tag = get_tag(field) if field in tag: dicom.add_new(tag[field]['tag'], tag[field]['VR'], value) else: bot.error("%s is not a valid field to add. Skipping." % (field)) return dicom
def get_deid(tag=None, exit_on_fail=True, quiet=False, load=False): '''get deid is intended to retrieve the full path of a deid file provided with the software, based on a tag. For example, under deid/data if a file is called "deid.dicom", the tag would be "dicom". Parameters ========== tag: the text that comes after deid to indicate the tag of the file in deid/data exit_on_fail: if None is an acceptable return value, this should be set to False (default is True). quiet: Default False. If None is acceptable, quiet can be set to True load: also load the deid, if resulting path (from path or tag) is not None ''' # no tag/path means load default if tag is None: tag = 'dicom' # If it's already loaded if isinstance(tag, dict): bot.debug('deid is already loaded.') return tag # If it's a path, get full path if os.path.exists(tag): deid = os.path.abspath(tag) else: deid = "%s/deid.%s" % (data_base, tag) if not os.path.exists(deid): if quiet is False: bot.error("Cannot find %s" % (deid)) if exit_on_fail is True: sys.exit(1) else: return None if load is True: return load_deid(deid) return deid
def save_dicom(dicom,dicom_file,output_folder=None,overwrite=False): '''save_dicom will save a dicom file to an output folder, making sure to not overwrite unless the user has enforced it ''' if output_folder is None: if overwrite is False: output_folder = tempfile.mkdtemp() else: output_folder = os.path.dirname(dicom_file) dicom_name = os.path.basename(dicom_file) output_dicom = os.path.join(output_folder,dicom_name) dowrite = True if overwrite is False: if os.path.exists(output_dicom): bot.error("%s already exists, overwrite set to False. Not writing." %dicom_name) dowrite = False if dowrite: dicom.save_as(output_dicom) return output_dicom
def parse_action(section, line, config, section_name=None): '''add action will take a line from a deid config file, a config (dictionary), and an active section name (eg header) and add an entry to the config file to perform the action. Parameters ========= section: a valid section name from the deid config file line: the line content to parse for the section/action config: the growing/current config dictionary section_name: optionally, a section name ''' if not line.upper().startswith(actions): bot.error("%s is not a valid action line." % line) sys.exit(1) # We may have to deal with cases of spaces parts = line.split(' ') action = parts.pop(0).replace(' ', '') # What field is the action for? if len(parts) < 1: bot.error("%s requires a FIELD value, but not found." % (action)) sys.exit(1) field = parts.pop(0) # Actions that require a value if action in ["ADD", "REPLACE", "JITTER"]: if len(parts) == 0: bot.error("%s requires a VALUE, but not found" % (action)) sys.exit(1) value = ' '.join(parts[0:]) # get remained of line value = value.split('#')[0] # remove comments bot.debug("Adding %s" % line) # config[section].append({ "action": action, "field": field, "value": value }) # Actions that don't require a value elif action in ["BLANK", "KEEP", "REMOVE"]: bot.debug("%s: adding %s" % (section, line)) config[section].append({"action": action, "field": field}) return config
def add_section(config, section, section_name=None): '''add section will add a section (and optionally) section name to a config Parameters ========== config: the config (dict) parsed thus far section: the section name to add section_name: an optional name, added as a level ''' if section is None: bot.error( 'You must define a section (e.g. %header) before any action.') sys.exit(1) if section == 'filter' and section_name is None: bot.error("You must provide a name for a filter section.") sys.exit(1) if section not in sections: bot.error("%s is not a valid section." % section) sys.exit(1) if section not in config: # If a section is named, we have more one level (dict) if section_name is not None: config[section] = OrderedDict() config[section][section_name] = [] bot.debug("Adding section %s %s" % (section, section_name)) else: config[section] = [] bot.debug("Adding section %s" % section) return config # Section is in config if section_name is not None and section_name not in config[section]: config[section][section_name] = [] return config
def replace_identifiers(dicom_files, ids, deid=None, save=True, overwrite=False, output_folder=None, force=True, config=None, strip_sequences=True, remove_private=True): '''replace identifiers using pydicom, can be slow when writing and saving new files''' dicom_files, deid, config = _prepare_replace_config(dicom_files, deid=deid, config=config) # Parse through dicom files, update headers, and save updated_files = [] for d in range(len(dicom_files)): dicom_file = dicom_files[d] dicom = read_file(dicom_file, force=force) idx = os.path.basename(dicom_file) fields = dicom.dir() # Remove sequences first, maintained in DataStore if strip_sequences is True: dicom = remove_sequences(dicom) if deid is not None: if idx in ids: for action in deid['header']: dicom = perform_action(dicom=dicom, item=ids[idx], action=action) else: bot.warning("%s is not in identifiers." % idx) continue # Next perform actions in default config, only if not done for action in config['put']['actions']: if action['field'] in fields: dicom = perform_action(dicom=dicom, action=action) if remove_private is True: try: dicom.remove_private_tags() except: bot.error( '''Private tags for %s could not be completely removed, usually this is due to invalid data type. Removing others.''' % idx) private_tags = get_private(dicom) for ptag in private_tags: del dicom[ptag.tag] continue else: bot.warning("Private tags were not removed!") ds = Dataset() for field in dicom.dir(): try: ds.add(dicom.data_element(field)) except: pass # Copy original data types attributes = [ 'is_little_endian', 'is_implicit_VR', 'preamble', '_parent_encoding' ] for attribute in attributes: ds.__setattr__(attribute, dicom.__getattribute__(attribute)) # Retain required meta data file_metas = getattr(dicom, 'file_meta', Dataset()) # Retain required meta data - not identifying # file_metas.MediaStorageSOPClassUID # file_metas.MediaStorageSOPInstanceUID # file_metas.ImplementationVersionName # file_metas.ImplementationClassUID # File attributes for meta attributes = [ 'TransferSyntaxUID', 'FileMetaInformationGroupLength', 'FileMetaInformationVersion' ] for attribute in attributes: file_metas.add(dicom.file_meta.data_element(attribute)) # Preamble is required ds.file_meta = file_metas ds.preamble = vars(dicom)['preamble'] # Save to file? if save is True: ds = save_dicom(dicom=ds, dicom_file=dicom_file, output_folder=output_folder, overwrite=overwrite) updated_files.append(ds) return updated_files
def get_shared_identifiers(dicom_files, force=True, config=None, aggregate=None, expand_sequences=True): """ extract shared identifiers across a set of dicom files, intended for cases when a set of images (dicom) are being compressed into one file and the file (still) should have some searchable metadata. By default, we remove fields that differ between files. To aggregate unique, define a list of aggregate fields (aggregate). """ bot.debug("Extracting shared identifiers for %s dicom" % (len(dicom_files))) if aggregate is None: aggregate = [] if config is None: config = "%s/config.json" % (here) if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] ids = dict() # identifiers # We will skip PixelData skip = config["skip"] for dicom_file in dicom_files: dicom = read_file(dicom_file, force=True) # Get list of fields, expanded sequences are flattened fields = get_fields(dicom, skip=skip, expand_sequences=expand_sequences) for key, val in fields.items(): # If it's there, only keep if the same if key in ids: # Items to aggregate are appended, not removed if key in aggregate: if val not in ids[key]: ids[key].append(val) else: # Keep only if equal between if ids[key] == val: continue else: del ids[key] skip.append(key) else: if key in aggregate: val = [val] ids[key] = val # For any aggregates that are one item, unwrap again for field in aggregate: if field in ids: if len(ids[field]) == 1: ids[field] = ids[field][0] return ids
def replace_identifiers( dicom_files, ids=None, deid=None, save=True, overwrite=False, output_folder=None, force=True, config=None, strip_sequences=True, remove_private=True, ): """replace identifiers using pydicom, can be slow when writing and saving new files. If you want to replace sequences, they need to be extracted with get_identifiers and expand_sequences to True. """ dicom_files, recipe, config = _prepare_replace_config(dicom_files, deid=deid, config=config) # ids (a lookup) is not required ids = ids or {} # Parse through dicom files, update headers, and save updated_files = [] for _, dicom_file in enumerate(dicom_files): if isinstance(dicom_file, Dataset): dicom = dicom_file dicom_file = dicom.filename else: dicom = read_file(dicom_file, force=force) dicom_name = os.path.basename(dicom_file) # Remove sequences first, maintained in DataStore if strip_sequences is True: dicom = remove_sequences(dicom) # Remove private tags at the onset, if requested if remove_private: try: dicom.remove_private_tags() except: bot.error( """Private tags for %s could not be completely removed, usually this is due to invalid data type. Removing others.""" % dicom_name) private_tags = get_private(dicom) for ptag in private_tags: del dicom[ptag.tag] continue # Include private tags (if not removed) plus dicom.dir fields = dicom_dir(dicom) if recipe.deid is not None: if dicom_file not in ids: ids[dicom_file] = {} # Prepare additional lists of values and fields (updates item) if recipe.has_values_lists(): for group, actions in recipe.get_values_lists().items(): ids[dicom_file][group] = extract_values_list( dicom=dicom, actions=actions) if recipe.has_fields_lists(): for group, actions in recipe.get_fields_lists().items(): ids[dicom_file][group] = extract_fields_list( dicom=dicom, actions=actions) for action in recipe.get_actions(): dicom = perform_action(dicom=dicom, item=ids[dicom_file], action=action) # Next perform actions in default config, only if not done for action in config["put"]["actions"]: if action["field"] in fields: dicom = perform_action(dicom=dicom, action=action) # Assemble a new dataset, again accounting for private tags ds = Dataset() for field in dicom_dir(dicom): try: # Most fields are strings if isinstance(field, str): ds.add(dicom.data_element(field)) # Remainder are tags else: ds.add(dicom.get(field)) except: pass # Copy original data attributes attributes = [ "is_little_endian", "is_implicit_VR", "is_decompressed", "read_encoding", "read_implicit_vr", "read_little_endian", "_parent_encoding", ] # We aren't including preamble, we will reset to be empty 128 bytes ds.preamble = b"\0" * 128 for attribute in attributes: if hasattr(dicom, attribute): ds.__setattr__(attribute, dicom.__getattribute__(attribute)) # Original meta data # or default empty dataset file_metas = getattr(dicom, "file_meta", Dataset()) # Media Storage SOP Instance UID can be identifying if hasattr(file_metas, "MediaStorageSOPInstanceUID"): file_metas.MediaStorageSOPInstanceUID = "" # Save meta data ds.file_meta = file_metas # Save to file? if save is True: ds = save_dicom( dicom=ds, dicom_file=dicom_file, output_folder=output_folder, overwrite=overwrite, ) updated_files.append(ds) return updated_files
def load_deid(path=None): '''load_deid will return a loaded in (user) deid configuration file that can be used to update a default config.json. If a file path is specified, it is loaded directly. If a folder is specified, we look for a deid file in the folder. If nothing is specified, we assume the user wants to load a deid file in the present working directory. If the user wants to have multiple deid files in a directory, this can be done with an extension that specifies the module, eg; deid.dicom deid.nifti Parameters ========== path: a path to a deid file Returns ======= config: a parsed deid (dictionary) with valid sections ''' path = find_deid(path) # Read in spec, clean up extra spaces and newlines spec = [ x.strip('\n').strip(' ') for x in read_file(path) if x.strip('\n').strip(' ') not in [''] ] spec = [x for x in spec if x not in ['', None]] config = OrderedDict() section = None while len(spec) > 0: # Clean up white trailing/leading space line = spec.pop(0).strip() # Comment if line.startswith("#"): continue # Starts with Format? elif bool(re.match('format', line, re.I)): fmt = re.sub('FORMAT|(\s+)', '', line).lower() if fmt not in formats: bot.error("%s is not a valid format." % fmt) sys.exit(1) # Set format config['format'] = fmt bot.debug("FORMAT set to %s" % fmt) # A new section? elif line.startswith('%'): # Remove any comments line = line.split('#', 1)[0].strip() # Is there a section name? section_name = None parts = line.split(' ') if len(parts) > 1: section_name = ' '.join(parts[1:]) section = re.sub('[%]|(\s+)', '', parts[0]).lower() if section not in sections: bot.error("%s is not a valid section." % section) sys.exit(1) config = add_section(config=config, section=section, section_name=section_name) # An action (replace, blank, remove, keep, jitter) elif line.upper().startswith(actions): # Start of a filter group if line.upper().startswith('LABEL') and section == "filter": members = [] keep_going = True while keep_going is True: next_line = spec[0] if next_line.upper().strip().startswith('LABEL'): keep_going = False elif next_line.upper().strip().startswith("%"): keep_going = False else: new_member = spec.pop(0) members.append(new_member) if len(spec) == 0: keep_going = False # Add the filter label to the config config = parse_label(config=config, section=section, label=line, section_name=section_name, members=members) # Parse the action else: config = parse_action(section=section, section_name=section_name, line=line, config=config) else: bot.debug("%s not recognized to be in valid format, skipping." % line) return config
def parse_member(members, operator=None): main_operator = operator actions = [] values = [] fields = [] operators = [] members = [members] while len(members) > 0: operator = None value = None member = members.pop(0).strip() # Find the first || or + match_or = re.search('\|\|', member) match_and = re.search('\+', member) if match_or is not None: operator = "||" if match_and is not None: if match_or is not None: if match_or.start() >= match_and.start(): operator = "+" else: operator = "+" if operator is not None: member, rest = member.split(operator, 1) # The rest is only valid if contains a filter statement if any(word in rest for word in filters): members.append(rest.strip()) # Split the statement based on found operator operator = (operator.replace('||', 'or').replace('+', 'and')) operators.append(operator) else: member = operator.join([member, rest]) # Parse the member action, member = member.split(' ', 1) action = action.lower().strip() # Contains, equals, not equals expects FieldName Values if action in ['contains', 'equals', 'notequals']: try: field, value = member.split(' ', 1) except ValueError: bot.error( '%s for line %s must have field and values, exiting.' % (action, member)) sys.exit(1) # Missing, empty, notcontains expect only a field elif action in ['missing', 'empty', 'notcontains', 'present']: field = member.strip() else: bot.error('%s is not a valid filter action.' % action) sys.exit(1) actions.append(action) fields.append(field.strip()) if value is not None: values.append(value.strip()) entry = { 'action': actions, 'field': fields, 'operator': main_operator, 'InnerOperators': operators, 'value': values } return entry
def replace_identifiers(dicom_files, ids, deid=None, save=True, overwrite=False, output_folder=None, force=True, config=None, strip_sequences=True, remove_private=True): '''replace identifiers using pydicom, can be slow when writing and saving new files. If you want to replace sequences, they need to be extracted with get_identifiers and expand_sequences to True. ''' dicom_files, recipe, config = _prepare_replace_config(dicom_files, deid=deid, config=config) # Parse through dicom files, update headers, and save updated_files = [] for _, dicom_file in enumerate(dicom_files): dicom = read_file(dicom_file, force=force) dicom_name = os.path.basename(dicom_file) fields = dicom.dir() # Remove sequences first, maintained in DataStore if strip_sequences is True: dicom = remove_sequences(dicom) if recipe.deid is not None: if dicom_file in ids: for action in deid.get_actions(): dicom = perform_action(dicom=dicom, item=ids[dicom_file], action=action) else: bot.warning("%s is not in identifiers." % dicom_name) continue # Next perform actions in default config, only if not done for action in config['put']['actions']: if action['field'] in fields: dicom = perform_action(dicom=dicom, action=action) if remove_private is True: try: dicom.remove_private_tags() except: bot.error( '''Private tags for %s could not be completely removed, usually this is due to invalid data type. Removing others.''' % dicom_name) private_tags = get_private(dicom) for ptag in private_tags: del dicom[ptag.tag] continue else: bot.warning("Private tags were not removed!") ds = Dataset() for field in dicom.dir(): try: ds.add(dicom.data_element(field)) except: pass # Copy original data attributes attributes = [ 'is_little_endian', 'is_implicit_VR', 'is_decompressed', 'read_encoding', 'read_implicit_vr', 'read_little_endian', '_parent_encoding' ] # We aren't including preamble, we will reset to be empty 128 bytes ds.preamble = b"\0" * 128 for attribute in attributes: if hasattr(dicom, attribute): ds.__setattr__(attribute, dicom.__getattribute__(attribute)) # Original meta data # or default empty dataset file_metas = getattr(dicom, 'file_meta', Dataset()) # Media Storage SOP Instance UID can be identifying if hasattr(file_metas, 'MediaStorageSOPInstanceUID'): file_metas.MediaStorageSOPInstanceUID = '' # Save meta data ds.file_meta = file_metas # Save to file? if save is True: ds = save_dicom(dicom=ds, dicom_file=dicom_file, output_folder=output_folder, overwrite=overwrite) updated_files.append(ds) return updated_files
def _has_burned_pixels_single(dicom_file,force=True, deid=None): '''has burned pixels single will evaluate one dicom file for burned in pixels based on 'filter' criteria in a deid. If deid is not provided, will use application default. The method proceeds as follows: 1. deid is loaded, with criteria groups ordered from specific --> general 2. image is run down the criteria, stops when hits and reports FLAG 3. passing through the entire list gives status of pass The default deid has a greylist, whitelist, then blacklist Parameters ========= dicom_file: the fullpath to the file to evaluate force: force reading of a potentially erroneous file deid: the full path to a deid specification. if not defined, only default used deid['filter']['dangerouscookie'] <-- filter list "dangerouscookie" --> This is what an item in the criteria looks like [{'coordinates': ['0,0,512,110'], 'filters': [{'InnerOperators': [], 'action': ['notequals'], 'field': ['OperatorsName'], 'operator': 'and', 'value': ['bold bread']}], 'name': 'criteria for dangerous cookie'}] Returns ======= --> This is what a clean image looks like: {'flagged': False, 'results': []} --> This is what a flagged image looks like: {'flagged': True, 'results': [ {'reason': ' ImageType missing or ImageType empty ', 'group': 'blacklist', 'coordinates': []} ] } ''' dicom = read_file(dicom_file,force=force) dicom_name = os.path.basename(dicom_file) # Load criteria (actions) for flagging if 'filter' not in deid: bot.error('Deid provided does not have %filter, exiting.') sys.exit(1) # Return list with lookup as dicom_file results = [] global_flagged = False for name,items in deid['filter'].items(): for item in items: flags = [] descriptions = [] # description for each group across items for group in item['filters']: group_flags = [] # evaluation for a single line group_descriptions = [] # You cannot pop from the list for a in range(len(group['action'])): action = group['action'][a] field = group['field'][a] value = '' if len(group['value']) > a: value = group['value'][a] flag = apply_filter(dicom=dicom, field=field, filter_name=action, value=value or None) group_flags.append(flag) description = "%s %s %s" %(field,action,value) if len(group['InnerOperators']) > a: inner_operator = group['InnerOperators'][a] group_flags.append(inner_operator) description = "%s %s" %(description,inner_operator) group_descriptions.append(description) # At the end of a group, evaluate the inner group flag = evaluate_group(group_flags) # "Operator" is relevant for the outcome of the list of actions operator = '' if 'operator' in group: if group['operator'] is not None: operator = group['operator'] flags.append(operator) flags.append(flag) reason = ('%s %s' %(operator,' '.join(group_descriptions))).replace('\n',' ') descriptions.append(reason) group_name = '' if "name" in item: group_name = item['name'] # When we parse through a group, we evaluate based on all flags flagged = evaluate_group(flags=flags) if flagged is True: global_flagged = True reason = ' '.join(descriptions) result = {'reason': reason, 'group': name, 'coordinates': item['coordinates'] } results.append(result) results = {'flagged': global_flagged, 'results': results } return results