예제 #1
0
def test_profile_flatten(some_pid_rules):
    """A profile can have multiple rule sets, but with flatten you should end up
    with one rule per DICOM tag
    """
    hash_name = Rule(SingleTag("PatientName"), Hash())

    # initial set
    set1 = RuleSet(rules=[some_pid_rules[0], hash_name])
    # set with a different rule for PatientID
    set2 = RuleSet(
        rules=[some_pid_rules[1],
               Rule(SingleTag("Modality"), Remove())])

    profile = Profile(rule_sets=[set1, set2])

    # The PatientID rule of set2 should be chosen when flattening
    assert some_pid_rules[1] in profile.flatten().rules
    assert some_pid_rules[0] not in profile.flatten().rules

    # if another set is added, the rules from this should overrule earlier
    set3 = RuleSet(name="another set", rules=[some_pid_rules[2]])
    assert some_pid_rules[2] in profile.flatten(
        additional_rule_sets=[set3]).rules
    # but any original rule that was not overwritten should still be present
    assert hash_name in profile.flatten(additional_rule_sets=[set3]).rules
예제 #2
0
    def compile(self, action_mapping: Dict[ActionCode, Operator]) -> RuleSet:
        """Replace each action code (string) with actual operator (function)"""

        compiled = []
        for identifier, actioncode in self.rules:
            try:
                operation = action_mapping[actioncode]
            except KeyError:
                raise IDISCoreException(
                    f'Unknown actioncode "{actioncode}" I do'
                    f" not know which operation add here")
            compiled.append(Rule(identifier=identifier, operation=operation))

        return RuleSet(rules=compiled, name=self.name)
예제 #3
0
    def apply_rules(self, rules: RuleSet, dataset: Dataset) -> Dataset:
        """Apply rules to each element in dataset, recursing into sequence elements

        This creates a deep copy of the input dataset. Except for PixelData, which
        will be a reference. PixelData is not copied because it can take up a lot
        of memory
        """
        deidentified = Dataset()
        pixel_data_tag = 0x7FE00010

        for element in dataset:
            if element.tag == pixel_data_tag:
                deidentified.add(
                    element)  # add pixel data as reference to save mem
            elif element.VR == VRs.Sequence.short_name:  # recurse into sequences
                deidentified.add(
                    DataElement(
                        tag=element.tag,
                        VR=element.VR,
                        value=Sequence([
                            self.apply_rules(rules, sub_dataset)
                            for sub_dataset in element
                        ]),
                    ))
            elif rule := rules.get_rule(element):
                if type(rule.operation) == Remove:
                    continue  # special handling. should be removed, do not add
                try:
                    new = rule.operation.apply(element, dataset)
                    deidentified.add(new)
                except ElementShouldBeRemoved:  # Operators signals removal
                    continue
            else:  # no rule found. Just copy the element over
                deidentified.add(
                    DataElement(tag=element.tag,
                                VR=element.VR,
                                value=element.value))
예제 #4
0
    def flatten(self, additional_rule_sets: List[RuleSet] = None) -> RuleSet:
        """Collapse all rule sets into one, ensuring only one rule per DICOM tag
        If a sets disagree, later sets (higher index in the list) take precedence.

        Parameters
        ----------
        additional_rule_sets: List[RuleSet]
            Append these to the existing rule sets, so they overrule them. Useful
            for one-time additions without changing the profile itself. For example
            when adding dataset-specific safe private rules.

        """
        if not additional_rule_sets:
            additional_rule_sets = []

        output = {}
        for rule_set in self.rule_sets + additional_rule_sets:
            output.update({x.identifier: x for x in rule_set.rules})

        return RuleSet(name="flattened", rules=set(output.values()))
예제 #5
0
def test_core_deidentify_safe_private(a_dataset, a_safe_private_definition):
    """Private elements marked as safe should not be removed by Clean()"""

    assert Tag("00b10010") in a_dataset  # a private creator tag
    assert Tag("00b11001") in a_dataset  # and a private tag

    # A core instance that should clean() private tags, but one tag is deemed safe
    ruleset = RuleSet(
        [Rule(PrivateTags(), Clean(safe_private=a_safe_private_definition))])
    core = Core(profile=Profile([ruleset]))

    # One tag should be kept
    deltas = extract_signature(deidentifier=core, dataset=a_dataset)
    assert {x.tag: x for x in deltas}[Tag("00b10010")].status == "REMOVED"
    assert {x.tag: x for x in deltas}[Tag("00b11001")].status == "UNCHANGED"

    # but only so long as dataset has modality = CT
    a_dataset.Modality = "US"
    deltas = extract_signature(deidentifier=core, dataset=a_dataset)
    assert {x.tag: x for x in deltas}[Tag("00b10010")].status == "REMOVED"
    assert {x.tag: x for x in deltas}[Tag("00b11001")].status == "REMOVED"
예제 #6
0
def test_rule_set():
    """Rule set should be able to find the proper rules for tags"""

    # some rules
    rule1 = Rule(SingleTag("PatientName"), Hash())
    rule2 = Rule(RepeatingGroup("50xx,xxxx"), Remove())
    rule3 = Rule(PrivateTags(), Remove())
    rules = RuleSet(rules=[rule1, rule2, rule3])

    assert rules.get_rule(DatEF(tag="PatientName")) == rule1
    assert rules.get_rule(
        DatEF(tag="Modality")) is None  # This rule is not defined
    assert rules.get_rule(
        DatEF(tag=(0x5000, 0x0001))) == rule2  # try a repeating rule
    assert (rules.get_rule(DatEF(tag=(0x1301, 0x0001))) == rule3
            )  # try a private tag rule
예제 #7
0
"""You can set your own rules for specific DICOM tags. Be aware that this might

mean the deidentification is no longer DICOM-complient
"""

import pydicom
from idiscore.core import Core, Profile
from idiscore.defaults import get_dicom_rule_sets
from idiscore.identifiers import RepeatingGroup, SingleTag
from idiscore.operators import Hash, Remove
from idiscore.rules import Rule, RuleSet

# Custom rules that will hash the patient name and remove all curve data
my_ruleset = RuleSet(
    rules=[
        Rule(SingleTag("PatientName"), Hash()),
        Rule(RepeatingGroup("50xx,xxxx"), Remove()),
    ],
    name="My Custom RuleSet",
)

sets = get_dicom_rule_sets()  # Contains official DICOM deidentification rules
profile = Profile(  # add custom rules to basic profile
    rule_sets=[sets.basic_profile, my_ruleset])
core = Core(profile)  # Create an deidentification core

# read a DICOM dataset from file and write to another
core.deidentify(pydicom.read("my_file.dcm")).save_as("deidentified.dcm")
예제 #8
0
def test_rule_precedence():
    """Rules are applied in order of generality - most specific first. Verify"""

    # Some rules with a potentially ambivalent order
    rule_a = Rule(PrivateTags(), Remove())  # Remove all private tags
    rule_b = Rule(Tag(0x1301, 0x0000), Keep())  # but keep this private tag
    rule_c = Rule(RepeatingGroup("50xx,xxxx"), Hash())  # match all these
    rule_d = Rule(Tag(0x5002, 0x0002), Keep())  # but specifically remove this
    rule_e = Rule(SingleTag("PatientName"), Hash())  # and one regular rule
    rules = RuleSet(rules=[rule_a, rule_b, rule_c, rule_d, rule_e])

    # now in all these cases, the most specific rule should be returned:
    assert rules.get_rule(
        DatEF(tag=(0x1301, 0x0000))) == rule_b  # also matches a
    assert rules.get_rule(
        DatEF(tag=(0x5002, 0x0002))) == rule_d  # also matches c
    assert rules.get_rule(DatEF(tag=(0x5002, 0x0001))) == rule_c
    assert rules.get_rule(
        DatEF(tag=(0x5001, 0x0001))) == rule_c  # also matches a
    assert rules.get_rule(DatEF(tag=(0x0010, 0x0010))) == rule_e
    assert rules.get_rule(DatEF(tag="Modality")) is None

    # For rules with identical generality, just keep the order of input
    rule_1 = Rule(RepeatingGroup("50xx,xxxx"), Hash())
    rule_2 = Rule(RepeatingGroup("xx10,xxxx"), Hash())
    rules = RuleSet(rules=[rule_1, rule_2])

    assert rules.get_rule(
        DatEF(tag=(0x5010, 0x0000))) == rule_1  # also matches a
    assert rules.get_rule(
        DatEF(tag=(0x5110, 0x0000))) == rule_2  # also matches a
예제 #9
0
def test_rule_set_human_readable(some_rules):

    as_string = RuleSet(some_rules).as_human_readable_list()
    assert "PatientName - (0010, 0010)" in as_string
    assert "Unknown Repeater tag" in as_string
예제 #10
0
def a_core_with_some_rules(some_rules) -> Core:
    """Core instance with a three-rule profile"""
    return Core(profile=Profile([RuleSet(some_rules)]))
예제 #11
0
def test_rule_set_remove():

    # some rules
    rule1 = Rule(SingleTag("PatientName"), Hash())
    rule2 = Rule(RepeatingGroup("50xx,xxxx"), Remove())
    rule3 = Rule(PrivateTags(), Remove())
    rules = RuleSet(rules=[rule1, rule2, rule3])

    assert len(rules.as_dict()) == 3
    rules.remove(rule3)
    assert len(rules.as_dict()) == 2
    rules.remove(rule1)
    assert len(rules.as_dict()) == 1

    with pytest.raises(KeyError):
        rules.remove(rule3)

    with pytest.raises(KeyError):
        rules.remove(rule1)