def test_profile_flatten(some_pid_rules): """A profile can have multiple rule sets, but with flatten you should end up with one rule per DICOM tag """ hash_name = Rule(SingleTag("PatientName"), Hash()) # initial set set1 = RuleSet(rules=[some_pid_rules[0], hash_name]) # set with a different rule for PatientID set2 = RuleSet( rules=[some_pid_rules[1], Rule(SingleTag("Modality"), Remove())]) profile = Profile(rule_sets=[set1, set2]) # The PatientID rule of set2 should be chosen when flattening assert some_pid_rules[1] in profile.flatten().rules assert some_pid_rules[0] not in profile.flatten().rules # if another set is added, the rules from this should overrule earlier set3 = RuleSet(name="another set", rules=[some_pid_rules[2]]) assert some_pid_rules[2] in profile.flatten( additional_rule_sets=[set3]).rules # but any original rule that was not overwritten should still be present assert hash_name in profile.flatten(additional_rule_sets=[set3]).rules
def compile(self, action_mapping: Dict[ActionCode, Operator]) -> RuleSet: """Replace each action code (string) with actual operator (function)""" compiled = [] for identifier, actioncode in self.rules: try: operation = action_mapping[actioncode] except KeyError: raise IDISCoreException( f'Unknown actioncode "{actioncode}" I do' f" not know which operation add here") compiled.append(Rule(identifier=identifier, operation=operation)) return RuleSet(rules=compiled, name=self.name)
def apply_rules(self, rules: RuleSet, dataset: Dataset) -> Dataset: """Apply rules to each element in dataset, recursing into sequence elements This creates a deep copy of the input dataset. Except for PixelData, which will be a reference. PixelData is not copied because it can take up a lot of memory """ deidentified = Dataset() pixel_data_tag = 0x7FE00010 for element in dataset: if element.tag == pixel_data_tag: deidentified.add( element) # add pixel data as reference to save mem elif element.VR == VRs.Sequence.short_name: # recurse into sequences deidentified.add( DataElement( tag=element.tag, VR=element.VR, value=Sequence([ self.apply_rules(rules, sub_dataset) for sub_dataset in element ]), )) elif rule := rules.get_rule(element): if type(rule.operation) == Remove: continue # special handling. should be removed, do not add try: new = rule.operation.apply(element, dataset) deidentified.add(new) except ElementShouldBeRemoved: # Operators signals removal continue else: # no rule found. Just copy the element over deidentified.add( DataElement(tag=element.tag, VR=element.VR, value=element.value))
def flatten(self, additional_rule_sets: List[RuleSet] = None) -> RuleSet: """Collapse all rule sets into one, ensuring only one rule per DICOM tag If a sets disagree, later sets (higher index in the list) take precedence. Parameters ---------- additional_rule_sets: List[RuleSet] Append these to the existing rule sets, so they overrule them. Useful for one-time additions without changing the profile itself. For example when adding dataset-specific safe private rules. """ if not additional_rule_sets: additional_rule_sets = [] output = {} for rule_set in self.rule_sets + additional_rule_sets: output.update({x.identifier: x for x in rule_set.rules}) return RuleSet(name="flattened", rules=set(output.values()))
def test_core_deidentify_safe_private(a_dataset, a_safe_private_definition): """Private elements marked as safe should not be removed by Clean()""" assert Tag("00b10010") in a_dataset # a private creator tag assert Tag("00b11001") in a_dataset # and a private tag # A core instance that should clean() private tags, but one tag is deemed safe ruleset = RuleSet( [Rule(PrivateTags(), Clean(safe_private=a_safe_private_definition))]) core = Core(profile=Profile([ruleset])) # One tag should be kept deltas = extract_signature(deidentifier=core, dataset=a_dataset) assert {x.tag: x for x in deltas}[Tag("00b10010")].status == "REMOVED" assert {x.tag: x for x in deltas}[Tag("00b11001")].status == "UNCHANGED" # but only so long as dataset has modality = CT a_dataset.Modality = "US" deltas = extract_signature(deidentifier=core, dataset=a_dataset) assert {x.tag: x for x in deltas}[Tag("00b10010")].status == "REMOVED" assert {x.tag: x for x in deltas}[Tag("00b11001")].status == "REMOVED"
def test_rule_set(): """Rule set should be able to find the proper rules for tags""" # some rules rule1 = Rule(SingleTag("PatientName"), Hash()) rule2 = Rule(RepeatingGroup("50xx,xxxx"), Remove()) rule3 = Rule(PrivateTags(), Remove()) rules = RuleSet(rules=[rule1, rule2, rule3]) assert rules.get_rule(DatEF(tag="PatientName")) == rule1 assert rules.get_rule( DatEF(tag="Modality")) is None # This rule is not defined assert rules.get_rule( DatEF(tag=(0x5000, 0x0001))) == rule2 # try a repeating rule assert (rules.get_rule(DatEF(tag=(0x1301, 0x0001))) == rule3 ) # try a private tag rule
"""You can set your own rules for specific DICOM tags. Be aware that this might mean the deidentification is no longer DICOM-complient """ import pydicom from idiscore.core import Core, Profile from idiscore.defaults import get_dicom_rule_sets from idiscore.identifiers import RepeatingGroup, SingleTag from idiscore.operators import Hash, Remove from idiscore.rules import Rule, RuleSet # Custom rules that will hash the patient name and remove all curve data my_ruleset = RuleSet( rules=[ Rule(SingleTag("PatientName"), Hash()), Rule(RepeatingGroup("50xx,xxxx"), Remove()), ], name="My Custom RuleSet", ) sets = get_dicom_rule_sets() # Contains official DICOM deidentification rules profile = Profile( # add custom rules to basic profile rule_sets=[sets.basic_profile, my_ruleset]) core = Core(profile) # Create an deidentification core # read a DICOM dataset from file and write to another core.deidentify(pydicom.read("my_file.dcm")).save_as("deidentified.dcm")
def test_rule_precedence(): """Rules are applied in order of generality - most specific first. Verify""" # Some rules with a potentially ambivalent order rule_a = Rule(PrivateTags(), Remove()) # Remove all private tags rule_b = Rule(Tag(0x1301, 0x0000), Keep()) # but keep this private tag rule_c = Rule(RepeatingGroup("50xx,xxxx"), Hash()) # match all these rule_d = Rule(Tag(0x5002, 0x0002), Keep()) # but specifically remove this rule_e = Rule(SingleTag("PatientName"), Hash()) # and one regular rule rules = RuleSet(rules=[rule_a, rule_b, rule_c, rule_d, rule_e]) # now in all these cases, the most specific rule should be returned: assert rules.get_rule( DatEF(tag=(0x1301, 0x0000))) == rule_b # also matches a assert rules.get_rule( DatEF(tag=(0x5002, 0x0002))) == rule_d # also matches c assert rules.get_rule(DatEF(tag=(0x5002, 0x0001))) == rule_c assert rules.get_rule( DatEF(tag=(0x5001, 0x0001))) == rule_c # also matches a assert rules.get_rule(DatEF(tag=(0x0010, 0x0010))) == rule_e assert rules.get_rule(DatEF(tag="Modality")) is None # For rules with identical generality, just keep the order of input rule_1 = Rule(RepeatingGroup("50xx,xxxx"), Hash()) rule_2 = Rule(RepeatingGroup("xx10,xxxx"), Hash()) rules = RuleSet(rules=[rule_1, rule_2]) assert rules.get_rule( DatEF(tag=(0x5010, 0x0000))) == rule_1 # also matches a assert rules.get_rule( DatEF(tag=(0x5110, 0x0000))) == rule_2 # also matches a
def test_rule_set_human_readable(some_rules): as_string = RuleSet(some_rules).as_human_readable_list() assert "PatientName - (0010, 0010)" in as_string assert "Unknown Repeater tag" in as_string
def a_core_with_some_rules(some_rules) -> Core: """Core instance with a three-rule profile""" return Core(profile=Profile([RuleSet(some_rules)]))
def test_rule_set_remove(): # some rules rule1 = Rule(SingleTag("PatientName"), Hash()) rule2 = Rule(RepeatingGroup("50xx,xxxx"), Remove()) rule3 = Rule(PrivateTags(), Remove()) rules = RuleSet(rules=[rule1, rule2, rule3]) assert len(rules.as_dict()) == 3 rules.remove(rule3) assert len(rules.as_dict()) == 2 rules.remove(rule1) assert len(rules.as_dict()) == 1 with pytest.raises(KeyError): rules.remove(rule3) with pytest.raises(KeyError): rules.remove(rule1)