def main(args, parser): # Global output folder output_folder = args.outfolder if output_folder is None: output_folder = tempfile.mkdtemp() # If a deid is given, check against format if args.deid is not None: params = load_deid(args.deid) if params["format"] != args.format: bot.error( "Format in deid (%s) doesn't match choice here (%s) exiting." % (params["format"], args.format)) # Get list of dicom files base = args.input if base is None: bot.info("No input folder specified, will use demo dicom-cookies.") base = get_dataset("dicom-cookies") basename = os.path.basename(base) dicom_files = list( get_files(base)) # todo : consider using generator functionality do_get = False do_put = False ids = None if args.action == "all": bot.info("GET and PUT identifiers from %s" % (basename)) do_get = True do_put = True elif args.action == "get": do_get = True bot.info("GET and PUT identifiers from %s" % (basename)) elif args.action == "put": bot.info("PUT identifiers from %s" % (basename)) do_put = True if args.ids is None: bot.exit( "To PUT without GET you must provide a json file with ids.") ids = args.ids # GET identifiers if do_get is True: ids = get_identifiers(dicom_files) if do_put is True: cleaned_files = replace_identifiers( dicom_files=dicom_files, ids=ids, deid=args.deid, overwrite=args.overwrite, output_folder=output_folder, ) bot.info("%s %s files at %s" % (len(cleaned_files), args.format, output_folder))
def find_deid(path=None): '''find_deid is a helper function to load_deid to find a deid file in a folder, or return the path provided if it is the file. Parameters ========== path: a path on the filesystem. If not provided, will assume PWD. ''' if path is None: path = os.getcwd() # The user has provided a directory if os.path.isdir(path): contenders = [ "%s/%s" % (path, x) for x in os.listdir(path) if x.startswith('deid') ] if len(contenders) == 0: bot.exit("No deid settings files found in %s, exiting." % (path)) elif len(contenders) > 1: bot.warning("Multiple deid files found in %s, will use first." % (path)) path = contenders[0] # We have a file path at this point if not os.path.exists(path): bot.exit("Cannot find deid file %s, exiting." % (path)) return path
def load_combined_deid(deids): '''load one or more deids, either based on a path or a tag Parameters ========== deids: should be a custom list of deids ''' if not isinstance(deids, list): bot.exit("load_combined_deids expects a list.") found_format = None deid = None for single_deid in deids: # If not a tag or path, returns None next_deid = get_deid(tag=single_deid, exit_on_fail=False, quiet=True, load=True) if next_deid is not None: # Formats must match if found_format is None: found_format = next_deid['format'] else: if found_format != next_deid['format']: bot.exis('Mismatch in deid formats, %s and %s' % (found_format, next_deid['format'])) # If it's the first one, use as starter template if deid is None: deid = next_deid else: # Update filter, appending to end to give first preference if "filter" in next_deid: if "filter" not in deid: deid['filter'] = next_deid['filter'] else: for name, group in next_deid['filter'].items(): if name in deid['filter']: deid['filter'][ name] = deid['filter'][name] + group else: deid['filter'][name] = group if "header" in next_deid: if "header" not in deid: deid['header'] = next_deid['header'] else: deid['header'] = deid['header'] + next_deid['header'] else: bot.warning('Problem loading %s, skipping.' % single_deid) return deid
def _remove_tag(dicom, item, field, value=None): """A wrapper to handle removal of a tag by calling tags.remove_tag. The user can optionally provide a value with a function to determine if a tag should be removed (returns True or False) """ value = value or "" do_removal = True # The user can optionally provide a function to return a boolean if re.search("[:]", value): value_type, value_option = value.split(":", 1) if value_type.lower() == "func": # An item must be provided if item == None: bot.warning( "The item parameter (dict) with values must be provided for a REMOVE func:%s" % value_option) # The function must be included in the item if value_option not in item: bot.warning("%s not found as key included with item." % value_option) # To the removal, this should return True/False # The calling function (currently) is required to handle parsing fields # that are tags. do_removal = item[value_option](dicom, value, field) if not isinstance(do_removal, bool): bot.warning( "function %s returned an invalid type %s. Must be bool." % (value_option, type(do_removal))) # A filter such as contains, notcontains, equals, etc. elif value_type.lower() in value_filters: # These functions are known to return boolean do_removal = apply_filter( dicom=dicom, field=field, filter_name=value_type, value=value_option or None, ) else: bot.exit("%s is an invalid variable type for REMOVE." % value_type) if do_removal: dicom = remove_tag(dicom, field) return dicom
def parse_format(line): """given a line that starts with FORMAT, parse the format of the file and check that it is supported. If not, exit on error. If yes, return the format. Parameters ========== line: the line that starts with format. """ fmt = re.sub("FORMAT|(\s+)", "", line).lower() if fmt not in formats: bot.exit("%s is not a valid format." % fmt) bot.debug("FORMAT set to %s" % fmt) return fmt
def load(self, dicom_file, force=True): """Ensure that the dicom file exists, and use full path. Here we load the file, and save the dicom, dicom_file, and dicom_name. """ # Reset seen, which is generated when we parse self.seen = [] # The user might already have provided a dataset if isinstance(dicom_file, Dataset): self.dicom = dicom_file else: # If we must read the file, the path must exist if not os.path.exists(dicom_file): bot.exit("%s does not exist." % dicom_file) self.dicom = read_file(dicom_file, force=force) # Set class variables that might be helpful later self.dicom_file = os.path.abspath(self.dicom.filename) self.dicom_name = os.path.basename(self.dicom_file)
def find_deid(path=None): """find_deid is a helper function to load_deid to find a deid file in a folder, or return the path provided if it is the file. Parameters ========== path: a path on the filesystem. If not provided, will assume PWD. """ # A default deid will be loaded if all else fails default_deid = os.path.join(get_installdir(), "data", "deid.dicom") if path is None: path = os.getcwd() # The user has provided a directory if os.path.isdir(path): contenders = [ "%s/%s" % (path, x) for x in os.listdir(path) if x.startswith("deid") ] if len(contenders) == 0: bot.warning( "No deid settings files found in %s, will use default dicom.deid." % path) contenders.append(default_deid) elif len(contenders) > 1: bot.warning("Multiple deid files found in %s, will use first." % (path)) path = contenders[0] # We have a file path at this point if not os.path.exists(path): bot.exit("Cannot find deid file %s, exiting." % (path)) return path
def parse_group_action(section, line, config, section_name): """parse a group action, either FIELD or SPLIT, which must belong to either a fields or values section. Parameters ========= section: a valid section name from the deid config file line: the line content to parse for the section/action config: the growing/current config dictionary section_name: optionally, a section name """ if not line.upper().startswith(group_actions): bot.exit("%s is not a valid group action." % line) if not line.upper().startswith("FIELD") and section == "fields": bot.exit("%fields only supports FIELD actions.") # We may have to deal with cases of spaces bot.debug("%s: adding %s" % (section, line)) parts = line.split(" ") action = parts.pop(0).replace(" ", "") # Both require some parts if not parts: bot.exit("%s action %s requires additional arguments" % (section, action)) # For both, the second is always a field or field expander field = parts.pop(0) # Fields supports one or more fields with expanders (no third arguments) if section == "fields": config[section][section_name].append({ "action": action, "field": field }) # Values supports FIELD or SPLIT elif section == "values": # If we have a third set of arguments if parts: value = _remove_comments(parts) config[section][section_name].append({ "action": action, "field": field, "value": value }) else: config[section][section_name].append({ "action": action, "field": field }) return config
def parse_config_action(section, line, config, section_name=None): """add action will take a line from a deid config file, a config (dictionary), and an active section name (eg header) and add an entry to the config file to perform the action. Parameters ========= section: a valid section name from the deid config file line: the line content to parse for the section/action config: the growing/current config dictionary section_name: optionally, a section name """ if not line.upper().startswith(actions): bot.exit("%s is not a valid action line." % line) # We may have to deal with cases of spaces parts = line.split(" ") action = parts.pop(0).replace(" ", "") # What field is the action for? if len(parts) < 1: bot.exit("%s requires a FIELD value, but not found." % action) field = parts.pop(0) # Actions that require a value if action in ["ADD", "REPLACE", "JITTER"]: if len(parts) == 0: bot.exit("%s requires a VALUE, but not found" % action) value = _remove_comments(parts) bot.debug("%s: adding %s" % (section, line)) config[section].append({"action": action, "field": field, "value": value}) # Actions that can optionally have a value elif action in ["REMOVE"]: bot.debug("%s: adding %s" % (section, line)) # Case 1: removing without any criteria if len(parts) == 0: config[section].append({"action": action, "field": field}) # Case 2: REMOVE can have a func:is_thing to return boolean else: value = _remove_comments(parts) config[section].append({"action": action, "field": field, "value": value}) # Actions that don't require a value elif action in ["BLANK", "KEEP"]: bot.debug("%s: adding %s" % (section, line)) config[section].append({"action": action, "field": field}) return config
def parse_action(section, line, config, section_name=None): '''add action will take a line from a deid config file, a config (dictionary), and an active section name (eg header) and add an entry to the config file to perform the action. Parameters ========= section: a valid section name from the deid config file line: the line content to parse for the section/action config: the growing/current config dictionary section_name: optionally, a section name ''' if not line.upper().startswith(actions): bot.exit("%s is not a valid action line." % line) # We may have to deal with cases of spaces parts = line.split(' ') action = parts.pop(0).replace(' ', '') # What field is the action for? if len(parts) < 1: bot.exit("%s requires a FIELD value, but not found." % action) field = parts.pop(0) # Actions that require a value if action in ["ADD", "REPLACE", "JITTER"]: if len(parts) == 0: bot.exit("%s requires a VALUE, but not found" % action) value = ' '.join(parts[0:]) # get remained of line value = value.split('#')[0] # remove comments bot.debug("Adding %s" % line) # config[section].append({ "action": action, "field": field, "value": value }) # Actions that don't require a value elif action in ["BLANK", "KEEP", "REMOVE"]: bot.debug("%s: adding %s" % (section, line)) config[section].append({"action": action, "field": field}) return config
def add_section(config, section, section_name=None): '''add section will add a section (and optionally) section name to a config Parameters ========== config: the config (dict) parsed thus far section: the section name to add section_name: an optional name, added as a level ''' if section is None: bot.exit('You must define a section (e.g. %header) before any action.') if section == 'filter' and section_name is None: bot.exit("You must provide a name for a filter section.") if section not in sections: bot.exit("%s is not a valid section." % section) if section not in config: # If a section is named, we have more one level (dict) if section_name is not None: config[section] = OrderedDict() config[section][section_name] = [] bot.debug("Adding section %s %s" % (section, section_name)) else: config[section] = [] bot.debug("Adding section %s" % section) return config # Section is in config if section_name is not None and section_name not in config[section]: config[section][section_name] = [] return config
def parse_member(members, operator=None): main_operator = operator actions = [] values = [] fields = [] operators = [] members = [members] while len(members) > 0: operator = None value = None member = members.pop(0).strip() # Find the first || or + match_or = re.search('\|\|', member) match_and = re.search('\+', member) if match_or is not None: operator = "||" if match_and is not None: if match_or is not None: if match_or.start() >= match_and.start(): operator = "+" else: operator = "+" if operator is not None: member, rest = member.split(operator, 1) # The rest is only valid if contains a filter statement if any(word in rest for word in filters): members.append(rest.strip()) # Split the statement based on found operator operator = (operator.replace('||', 'or').replace('+', 'and')) operators.append(operator) else: member = operator.join([member, rest]) # Parse the member action, member = member.split(' ', 1) action = action.lower().strip() # Contains, notcontains, equals, not equals expects FieldName Values if action in ['contains', 'notcontains', 'equals', 'notequals']: try: field, value = member.split(' ', 1) except ValueError: bot.exit( '%s for line %s must have field and values, exiting.' % (action, member)) # Missing, empty, expect only a field elif action in ['missing', 'empty', 'present']: field = member.strip() else: bot.exit('%s is not a valid filter action.' % action) actions.append(action) fields.append(field.strip()) if value is not None: values.append(value.strip()) entry = { 'action': actions, 'field': fields, 'operator': main_operator, 'InnerOperators': operators, 'value': values } return entry
def load_deid(path=None): '''load_deid will return a loaded in (user) deid configuration file that can be used to update a default config.json. If a file path is specified, it is loaded directly. If a folder is specified, we look for a deid file in the folder. If nothing is specified, we assume the user wants to load a deid file in the present working directory. If the user wants to have multiple deid files in a directory, this can be done with an extension that specifies the module, eg; deid.dicom deid.nifti Parameters ========== path: a path to a deid file Returns ======= config: a parsed deid (dictionary) with valid sections ''' path = find_deid(path) # Read in spec, clean up extra spaces and newlines spec = [ x.strip('\n').strip(' ') for x in read_file(path) if x.strip('\n').strip(' ') not in [''] ] spec = [x for x in spec if x not in ['', None]] config = OrderedDict() section = None while len(spec) > 0: # Clean up white trailing/leading space line = spec.pop(0).strip() # Comment if line.startswith("#"): continue # Starts with Format? elif bool(re.match('format', line, re.I)): fmt = re.sub('FORMAT|(\s+)', '', line).lower() if fmt not in formats: bot.exit("%s is not a valid format." % fmt) # Set format config['format'] = fmt bot.debug("FORMAT set to %s" % fmt) # A new section? elif line.startswith('%'): # Remove any comments line = line.split('#', 1)[0].strip() # Is there a section name? section_name = None parts = line.split(' ') if len(parts) > 1: section_name = ' '.join(parts[1:]) section = re.sub('[%]|(\s+)', '', parts[0]).lower() if section not in sections: bot.exit("%s is not a valid section." % section) config = add_section(config=config, section=section, section_name=section_name) # An action (replace, blank, remove, keep, jitter) elif line.upper().startswith(actions): # Start of a filter group if line.upper().startswith('LABEL') and section == "filter": members = [] keep_going = True while keep_going is True: next_line = spec[0] if next_line.upper().strip().startswith('LABEL'): keep_going = False elif next_line.upper().strip().startswith("%"): keep_going = False else: new_member = spec.pop(0) members.append(new_member) if len(spec) == 0: keep_going = False # Add the filter label to the config config = parse_label(config=config, section=section, label=line, section_name=section_name, members=members) # Parse the action else: config = parse_action(section=section, section_name=section_name, line=line, config=config) else: bot.debug("%s not recognized to be in valid format, skipping." % line) return config
def parse_member(members, operator=None): main_operator = operator actions = [] values = [] fields = [] operators = [] members = [members] while len(members) > 0: operator = None value = None member = members.pop(0).strip() # Find the first || or + match_or = re.search("\|\|", member) match_and = re.search("\+", member) if match_or is not None: operator = "||" if match_and is not None: if match_or is not None: if match_or.start() >= match_and.start(): operator = "+" else: operator = "+" if operator is not None: member, rest = member.split(operator, 1) # The rest is only valid if contains a filter statement if any(word in rest for word in filters): members.append(rest.strip()) # Split the statement based on found operator operator = operator.replace("||", "or").replace("+", "and") operators.append(operator) else: member = operator.join([member, rest]) # Parse the member action, member = member.split(" ", 1) action = action.lower().strip() # Contains, notcontains, equals, not equals expects FieldName Values if action in ["contains", "notcontains", "equals", "notequals"]: try: field, value = member.split(" ", 1) except ValueError: bot.exit( "%s for line %s must have field and values, exiting." % (action, member)) # Missing, empty, expect only a field elif action in ["missing", "empty", "present"]: field = member.strip() else: bot.exit("%s is not a valid filter action." % action) actions.append(action) fields.append(field.strip()) if value is not None: values.append(value.strip()) entry = { "action": actions, "field": fields, "operator": main_operator, "InnerOperators": operators, "value": values, } return entry
def load_deid(path=None): """load_deid will return a loaded in (user) deid configuration file that can be used to update a default config.json. If a file path is specified, it is loaded directly. If a folder is specified, we look for a deid file in the folder. If nothing is specified, we assume the user wants to load a deid file in the present working directory. If the user wants to have multiple deid files in a directory, this can be done with an extension that specifies the module, eg; deid.dicom deid.nifti Parameters ========== path: a path to a deid file Returns ======= config: a parsed deid (dictionary) with valid sections """ path = find_deid(path) # Read in spec, clean up extra spaces and newlines spec = [ x.strip("\n").strip(" ") for x in read_file(path) if x.strip("\n").strip(" ") not in [""] ] spec = [x for x in spec if x not in ["", None]] config = OrderedDict() section = None while spec: # Clean up white trailing/leading space line = spec.pop(0).strip() # Comment if line.startswith("#"): continue # Set format elif bool(re.match("^format", line, re.I)): config["format"] = parse_format(line) # A new section? elif line.startswith("%"): # Remove any comments line = line.split("#", 1)[0].strip() # Is there a section name? section_name = None parts = line.split(" ") if len(parts) > 1: section_name = " ".join(parts[1:]) section = re.sub("[%]|(\s+)", "", parts[0]).lower() if section not in sections: bot.exit("%s is not a valid section." % section) config = add_section(config=config, section=section, section_name=section_name) # A %fields action (only field allowed), %values allows split elif line.upper().startswith(group_actions) and section in groups: config = parse_group_action(section=section, section_name=section_name, line=line, config=config) # An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL) elif line.upper().startswith(actions): # Start of a filter group if line.upper().startswith("LABEL") and section == "filter": members = parse_filter_group(spec) # Add the filter label to the config config = parse_label( config=config, section=section, label=line, section_name=section_name, members=members, ) # Parse the action else: config = parse_config_action(section=section, section_name=section_name, line=line, config=config) else: bot.warning("%s not recognized to be in valid format, skipping." % line) return config
def _has_burned_pixels_single(dicom_file, force, deid): """has burned pixels single will evaluate one dicom file for burned in pixels based on 'filter' criteria in a deid. If deid is not provided, will use application default. The method proceeds as follows: 1. deid is loaded, with criteria groups ordered from specific --> general 2. image is run down the criteria, stops when hits and reports FLAG 3. passing through the entire list gives status of pass The default deid has a greylist, whitelist, then blacklist Parameters ========= dicom_file: the fullpath to the file to evaluate force: force reading of a potentially erroneous file deid: the full path to a deid specification. if not defined, only default used deid['filter']['dangerouscookie'] <-- filter list "dangerouscookie" --> This is what an item in the criteria looks like [{'coordinates': ['0,0,512,110'], 'filters': [{'InnerOperators': [], 'action': ['notequals'], 'field': ['OperatorsName'], 'operator': 'and', 'value': ['bold bread']}], 'name': 'criteria for dangerous cookie'}] Returns ======= --> This is what a clean image looks like: {'flagged': False, 'results': []} --> This is what a flagged image looks like: {'flagged': True, 'results': [ {'reason': ' ImageType missing or ImageType empty ', 'group': 'blacklist', 'coordinates': []} ] } """ dicom = read_file(dicom_file, force=force) # Load criteria (actions) for flagging filters = deid.get_filters() if not filters: bot.exit("Deid provided does not have %filter, exiting.") # Return list with lookup as dicom_file results = [] global_flagged = False for name, items in filters.items(): for item in items: flags = [] descriptions = [] # description for each group across items for group in item["filters"]: group_flags = [] # evaluation for a single line group_descriptions = [] # You cannot pop from the list for a in range(len(group["action"])): action = group["action"][a] field = group["field"][a] value = "" if len(group["value"]) > a: value = group["value"][a] flag = apply_filter( dicom=dicom, field=field, filter_name=action, value=value or None, ) group_flags.append(flag) description = "%s %s %s" % (field, action, value) if len(group["InnerOperators"]) > a: inner_operator = group["InnerOperators"][a] group_flags.append(inner_operator) description = "%s %s" % (description, inner_operator) group_descriptions.append(description) # At the end of a group, evaluate the inner group flag = evaluate_group(group_flags) # "Operator" is relevant for the outcome of the list of actions operator = "" if "operator" in group: if group["operator"] is not None: operator = group["operator"] flags.append(operator) flags.append(flag) reason = ("%s %s" % (operator, " ".join(group_descriptions))).replace( "\n", " ") descriptions.append(reason) # When we parse through a group, we evaluate based on all flags flagged = evaluate_group(flags=flags) if flagged is True: global_flagged = True reason = " ".join(descriptions) # If coordinates are empty, we derive from dicom if item["coordinates"] and "from:" in item["coordinates"][0]: item["coordinates"] = extract_coordinates( dicom, item["coordinates"][0]) result = { "reason": reason, "group": name, "coordinates": item["coordinates"], } results.append(result) results = {"flagged": global_flagged, "results": results} return results
def perform_action(dicom, action, item=None, fields=None, return_seen=False): """perform action takes Parameters ========== dicom: a loaded dicom file (pydicom read_file) item: a dictionary with keys as fields, values as values fields: if provided, a filtered list of fields for expand action: the action from the parsed deid to take "deid" (eg, PatientID) the header field to process "action" (eg, REPLACE) what to do with the field "value": if needed, the field from the response to replace with """ field = action.get( "field") # e.g: PatientID, endswith:ID, values:name, fields:name value = action.get("value") # "suid" or "var:field" action = action.get("action") # "REPLACE" # Validate the action if action not in valid_actions: bot.warning("%s in not a valid choice. Defaulting to blanked." % action) action = "BLANK" # If values or fields is provided, ids is required if re.search("^(values|fields)", field): if not item: bot.exit( "An item lookup must be provided to reference a list of values or fields." ) # A values list returns fields with the value (can be private tags if not removed) if re.search("^values", field): values = item.get(re.sub("^values:", "", field), []) fields = find_by_values(values=values, dicom=dicom) # A fields list is used vertabim elif re.search("^fields", field): listing = [] for contender in item.get(re.sub("^fields:", "", field), []): listing += expand_field_expression(field=contender, dicom=dicom, contenders=fields) fields = listing else: # If there is an expander applied to field, we iterate over fields = expand_field_expression(field=field, dicom=dicom, contenders=fields) # Keep track of fields we have seen seen = [] # An expanded field must END with that field expanded_regexp = "__%s$" % field for field in fields: # This key can be for a string or tag seen.append(field) # Handle top level field, this can be a key (string) or tag _perform_action(dicom=dicom, field=field, item=item, action=action, value=value) # Expand sequences if item: expanded_fields = [ x for x in item if re.search(expanded_regexp, str(x)) ] # FieldA__FieldB for expanded_field in expanded_fields: _perform_expanded_action( dicom=dicom, expanded_field=expanded_field, item=item, action=action, value=value, ) if return_seen: return dicom, seen return dicom