def test_field_expansion(self): print("Test deid.dicom.fields expand_field_expression") from deid.dicom.fields import expand_field_expression dicom = get_dicom(self.dataset) contenders = get_fields(dicom) print("Testing that field expansion works for basic tags") fields = expand_field_expression(dicom=dicom, field="endswith:Time", contenders=contenders) # The fields returned should end in time for uid, field in fields.items(): assert field.name.endswith("Time") print("Testing that we can also search private tags based on numbers.") fields = expand_field_expression(dicom=dicom, field="contains:0019", contenders=contenders) # The fields returned should include tag group or element 0019 for uid, field in fields.items(): assert "0019" in uid print("Testing nested private tags") dataset = get_dataset("animals") # includes nested private tags dicom = get_dicom(dataset)
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # removes empty / null # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ dicom.get(x) for x in fields if x.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = [x for x in fields if "Instance" in x] actual = extract_fields_list(dicom, actions) self.assertEqual(actual, expected) # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values ids[dicom.filename]["cookie_names"] = expected_names ids[dicom.filename]["operator_names"] = expected_operator # This is for fields ids[dicom.filename]["instance_fields"] = expected ids[dicom.filename]["id"] = "new-cookie-id" ids[dicom.filename]["source_id"] = "new-operator-id" replaced = replace_identifiers(dicom, ids=ids, save=False, deid=self.deid) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id") # Currently we don't well handle tag types, so we convert to string for field in expected_operator: self.assertTrue(str(field) not in cleaned)
def get_identifiers(dicom_files, force=True, config=None, expand_sequences=True, skip_fields=None): """ extract all identifiers from a dicom image. This function returns a lookup by file name, and does not include private tags. Parameters ========== dicom_files: the dicom file(s) to extract from force: force reading the file (default True) config: if None, uses default in provided module folder expand_sequences: if True, expand sequences. Otherwise, skips skip_fields: if not None, added fields to skip """ if config is None: config = "%s/config.json" % here if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] bot.debug("Extracting identifiers for %s dicom" % len(dicom_files)) ids = dict() # identifiers # We will skip PixelData skip = config["skip"] if skip_fields is not None: if not isinstance(skip_fields, list): skip_fields = [skip_fields] skip = skip + skip_fields for dicom_file in dicom_files: if isinstance(dicom_file, Dataset): dicom = dicom_file dicom_file = dicom.filename else: dicom = read_file(dicom_file, force=force) if dicom_file not in ids: ids[dicom_file] = dict() ids[dicom_file] = get_fields(dicom, skip=skip, expand_sequences=expand_sequences) return ids
def get_fields(self, expand_sequences=True): """expand all dicom fields into a list, where each entry is a DicomField. If we find a sequence, we unwrap it and represent the location with the name (e.g., Sequence__Child) """ if not self.fields: self.fields = get_fields( dicom=self.dicom, expand_sequences=expand_sequences, seen=self.seen, ) return self.fields
def get_shared_identifiers(dicom_files, force=True, config=None, aggregate=None, expand_sequences=True): """ extract shared identifiers across a set of dicom files, intended for cases when a set of images (dicom) are being compressed into one file and the file (still) should have some searchable metadata. By default, we remove fields that differ between files. To aggregate unique, define a list of aggregate fields (aggregate). """ bot.debug("Extracting shared identifiers for %s dicom" % (len(dicom_files))) if aggregate is None: aggregate = [] if config is None: config = "%s/config.json" % (here) if not os.path.exists(config): bot.error("Cannot find config %s, exiting" % (config)) config = read_json(config, ordered_dict=True)["get"] if not isinstance(dicom_files, list): dicom_files = [dicom_files] ids = dict() # identifiers # We will skip PixelData skip = config["skip"] for dicom_file in dicom_files: dicom = read_file(dicom_file, force=True) # Get list of fields, expanded sequences are flattened fields = get_fields(dicom, skip=skip, expand_sequences=expand_sequences) for key, val in fields.items(): # If it's there, only keep if the same if key in ids: # Items to aggregate are appended, not removed if key in aggregate: if val not in ids[key]: ids[key].append(val) else: # Keep only if equal between if ids[key] == val: continue else: del ids[key] skip.append(key) else: if key in aggregate: val = [val] ids[key] = val # For any aggregates that are one item, unwrap again for field in aggregate: if field in ids: if len(ids[field]) == 1: ids[field] = ids[field][0] return ids
def test_extract_groups(self): print("Test deid.dicom.groups extract_values_list") from deid.dicom.groups import extract_values_list, extract_fields_list dicom = get_dicom(self.dataset) fields = get_fields(dicom) # Test split action actions = [{ "action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4' }] expected_names = dicom.get("PatientID").split("^") actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_names) # Test field action actions = [{"action": "FIELD", "field": "startswith:Operator"}] expected_operator = [ x.element.value for uid, x in fields.items() if x.element.keyword.startswith("Operator") ] actual = extract_values_list(dicom, actions) self.assertEqual(actual, expected_operator) print("Test deid.dicom.groups extract_fields_list") actions = [{"action": "FIELD", "field": "contains:Instance"}] expected = { uid: x for uid, x in fields.items() if "Instance" in x.element.keyword } actual = extract_fields_list(dicom, actions) for uid in expected: assert uid in actual # Get identifiers for file ids = get_identifiers(dicom) self.assertTrue(isinstance(ids, dict)) # Add keys to be used for replace to ids - these first are for values parser = DicomParser(dicom, recipe=self.deid) parser.define("cookie_names", expected_names) parser.define("operator_names", expected_operator) # This is for fields parser.define("instance_fields", expected) parser.define("id", "new-cookie-id") parser.define("source_id", "new-operator-id") parser.parse() # Were the changes made? assert parser.dicom.get("PatientID") == "new-cookie-id" assert parser.dicom.get("OperatorsName") == "new-operator-id" # Instance fields should be removed based on recipe for uid, field in parser.lookup["instance_fields"].items(): self.assertTrue(field.element.keyword not in parser.dicom) # Start over dicom = get_dicom(self.dataset) # We need to provide ids with variables "id" and "source_id" ids = { dicom.filename: { "id": "new-cookie-id", "source_id": "new-operator-id" } } # Returns list of updated dicom, since save is False replaced = replace_identifiers(dicom, save=False, deid=self.deid, ids=ids) cleaned = replaced.pop() self.assertEqual(cleaned.get("PatientID"), "new-cookie-id") self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")