def test_fieldset_remove_private(self):
        """
        %fields field_set2_private
        FIELD 00090010
        FIELD PatientID
        %header
        REMOVE fields:field_set2_private
        """

        print("Test private tag fieldset")
        dicom_file = get_file(self.dataset)

        actions = [{"action": "REMOVE", "field": "fields:field_set2_private"}]
        fields = OrderedDict()
        fields["field_set2_private"] = [
            {
                "field": "00090010",
                "action": "FIELD"
            },
            {
                "field": "PatientID",
                "action": "FIELD"
            },
        ]
        recipe = create_recipe(actions, fields)

        parser = DicomParser(dicom_file, recipe=recipe)
        parser.parse()
        self.assertTrue("(0009, 0010)" in parser.lookup["field_set2_private"])
        self.assertTrue("(0010, 0020)" in parser.lookup["field_set2_private"])

        with self.assertRaises(KeyError):
            check1 = parser.dicom["00090010"].value
        with self.assertRaises(KeyError):
            check2 = parser.dicom["PatientID"].value
Exemple #2
0
def get_identifiers(
    dicom_files, force=True, config=None, strip_sequences=False, remove_private=False
):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, where each value indexed
        includes a dictionary of nested fields (indexed by nested tag).

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        strip_sequences: if True, remove all sequences
        remove_private: remove private tags

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    lookup = dict()

    # Parse each dicom file
    for dicom_file in dicom_files:
        parser = DicomParser(dicom_file, force=force)
        lookup[parser.dicom_file] = parser.get_fields()

    return lookup
    def test_fieldset_remove(self):
        """  RECIPE
        %fields field_set1
        FIELD Manufacturer
        FIELD contains:Time
        %header
        REMOVE fields:field_set1
        """

        print("Test public tag fieldset")
        dicom_file = get_file(self.dataset)

        actions = [{"action": "REMOVE", "field": "fields:field_set1"}]
        fields = OrderedDict()
        fields["field_set1"] = [
            {
                "field": "Manufacturer",
                "action": "FIELD"
            },
            {
                "field": "contains:Collimation",
                "action": "FIELD"
            },
        ]

        recipe = create_recipe(actions, fields)

        # Method 1: Use DicomParser
        parser = DicomParser(dicom_file, recipe=recipe)
        number_fields = len(parser.dicom)  # 160
        parser.parse()

        # The number of fields to be removed
        to_remove = len(parser.lookup["field_set1"])

        expected_number = number_fields - to_remove

        # {'field_set1': {'(0008, 0070)': (0008, 0070) Manufacturer                        LO: 'SIEMENS'  [Manufacturer],
        # '(0018, 9306)': (0018, 9306) Single Collimation Width            FD: 1.2  [SingleCollimationWidth],
        # '(0018, 9307)': (0018, 9307) Total Collimation Width             FD: 14.399999999999999  [TotalCollimationWidth]}}

        # Method 1: use replace_identifiers
        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )
        self.assertEqual(1, len(result))
        print(len(result[0]))
        self.assertEqual(expected_number, len(result[0]))
        with self.assertRaises(KeyError):
            check1 = result[0]["Manufacturer"].value
        with self.assertRaises(KeyError):
            check2 = result[0]["TotalCollimationWidth"].value
        with self.assertRaises(KeyError):
            check3 = result[0]["SingleCollimationWidth"].value
    def test_remove(self):
        """ RECIPE RULE
        REMOVE InstitutionName
        REMOVE 00190010
        """

        print("Test remove of public and private tags")
        dicom_file = get_file(self.dataset)

        field1name = "InstitutionName"
        field2name = "00190010"

        actions = [
            {
                "action": "REMOVE",
                "field": field1name
            },
            {
                "action": "REMOVE",
                "field": field2name
            },
        ]
        recipe = create_recipe(actions)
        dicom = read_file(dicom_file)

        # Create a DicomParser to easily find fields
        parser = DicomParser(dicom_file)
        parser.parse()

        # The first in the list is the highest level
        field1 = list(parser.find_by_name(field1name).values())[0]
        field2 = list(parser.find_by_name(field2name).values())[0]

        self.assertIsNotNone(field1.element.value)
        self.assertIsNotNone(field2.element.value)

        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )

        # Create a DicomParser to easily find fields
        parser = DicomParser(result[0])
        parser.parse()

        # Removed means we don't find them
        assert not parser.find_by_name(field1name)
        assert not parser.find_by_name(field2name)

        self.assertEqual(1, len(result))
        with self.assertRaises(KeyError):
            check1 = result[0][field1name].value
        with self.assertRaises(KeyError):
            check2 = result[0][field2name].value
Exemple #5
0
def replace_identifiers(
    dicom_files,
    ids=None,
    deid=None,
    save=False,
    overwrite=False,
    output_folder=None,
    force=True,
    config=None,
    strip_sequences=False,
    remove_private=False,
):

    """replace identifiers using pydicom, can be slow when writing
    and saving new files. If you want to replace sequences, they need
    to be extracted with get_identifiers and expand_sequences to True.
    """

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    # Warn the user that we use the default deid recipe
    if not deid:
        bot.warning("No deid specification provided, will use defaults.")

    # ids (a lookup) is not required
    ids = ids or {}

    # Parse through dicom files, update headers, and save
    updated_files = []
    for dicom_file in dicom_files:
        parser = DicomParser(dicom_file, force=force, config=config, recipe=deid)

        # If a custom lookup was provided, update the parser
        if parser.dicom_file in ids:
            parser.lookup.update(ids[parser.dicom_file])

        parser.parse(strip_sequences=strip_sequences, remove_private=remove_private)

        # Save to file, otherwise return updated objects
        if save is True:
            ds = save_dicom(
                dicom=parser.dicom,
                dicom_file=parser.dicom_file,
                output_folder=output_folder,
                overwrite=overwrite,
            )
            updated_files.append(ds)
        else:
            updated_files.append(parser.dicom)

    return updated_files
    def test_valueset_remove_one_empty(self):
        """
        Testing to ensure correct actions are taken when a defined valueset contains a field that has an empty value. Since the
        ConversionType flag contains "No Value", in the test below, value_set1 will only have the value from Manufacturer and should
        only identify the fields which contain "SIEMENS".

        %values value_set1
        FIELD ConversionType
        FIELD Manufacturer
        %header
        REMOVE values:value_set1
        """
        import pydicom

        print("Test one empty value valueset")
        dicom_file = get_file(self.dataset)
        original_dataset = pydicom.dcmread(dicom_file)

        actions = [{"action": "REMOVE", "field": "values:value_set1"}]
        values = OrderedDict()
        values["value_set1"] = [
            {
                "field": "ConversionType",
                "action": "FIELD"
            },
            {
                "field": "Manufacturer",
                "action": "FIELD"
            },
        ]
        recipe = create_recipe(actions, values=values)

        # Check that values we want are present using DicomParser
        parser = DicomParser(dicom_file, recipe=recipe)
        parser.parse()
        self.assertEqual(len(parser.lookup["value_set1"]), 1)
        self.assertTrue("SIEMENS" in parser.lookup["value_set1"])

        # Perform action
        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )
        self.assertEqual(1, len(result))
        self.assertNotEqual(len(original_dataset), len(result[0]))
        with self.assertRaises(KeyError):
            check1 = result[0]["00090010"].value
        with self.assertRaises(KeyError):
            check2 = result[0]["Manufacturer"].value
    def test_replace_with_constant(self):
        """ RECIPE RULE
        REPLACE AccessionNumber 987654321
        REPLACE 00190010 NEWVALUE!
        """

        print("Test replace tags with constant values")
        dicom_file = get_file(self.dataset)

        newfield1 = "AccessionNumber"
        newvalue1 = "987654321"
        newfield2 = "00190010"
        newvalue2 = "NEWVALUE!"

        actions = [
            {
                "action": "REPLACE",
                "field": newfield1,
                "value": newvalue1
            },
            {
                "action": "REPLACE",
                "field": newfield2,
                "value": newvalue2
            },
        ]
        recipe = create_recipe(actions)

        # Create a DicomParser to easily find fields
        parser = DicomParser(dicom_file)
        parser.parse()

        # The first in the list is the highest level
        field1 = list(parser.find_by_name(newfield1).values())[0]
        field2 = list(parser.find_by_name(newfield2).values())[0]

        self.assertNotEqual(newvalue1, field1.element.value)
        self.assertNotEqual(newvalue2, field2.element.value)

        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )

        self.assertEqual(1, len(result))
        self.assertEqual(newvalue1, result[0][newfield1].value)
        self.assertEqual(newvalue2, result[0][newfield2].value)
    def test_valueset_remove(self):
        """
        %values value_set1
        FIELD contains:Manufacturer
        SPLIT contains:Physician by="^";minlength=3
        %header REMOVE values:value_set1
        """

        print("Test public tag valueset")
        dicom_file = get_file(self.dataset)

        actions = [{"action": "REMOVE", "field": "values:value_set1"}]
        values = OrderedDict()
        values["value_set1"] = [
            {
                "field": "contains:Manufacturer",
                "action": "FIELD"
            },
            {
                "value": 'by="^";minlength=3',
                "field": "contains:Physician",
                "action": "SPLIT",
            },
        ]
        recipe = create_recipe(actions, values=values)

        # Check that values we want are present using DicomParser
        parser = DicomParser(dicom_file, recipe=recipe)
        parser.parse()
        self.assertTrue("SIEMENS" in parser.lookup["value_set1"])
        self.assertTrue("HIBBARD" in parser.lookup["value_set1"])

        # Perform action
        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )
        self.assertEqual(1, len(result))
        with self.assertRaises(KeyError):
            check1 = result[0]["00090010"].value
        with self.assertRaises(KeyError):
            check2 = result[0]["Manufacturer"].value
        with self.assertRaises(KeyError):
            check3 = result[0]["PhysiciansOfRecord"].value
    def test_valueset_empty_remove(self):
        """
        Testing to ensure correct actions are taken when a defined valueset contains no data (the field identified has an empty value). Since the
        ConversionType flag contains "No Value", in the test below, value_set1 will be empty and as a result this combination of rules should have no
        impact on the header.  The input header should be identical to the output header.

        %values value_set1
        FIELD ConversionType
        %header
        REMOVE values:value_set1
        """
        import pydicom

        print("Test empty value valueset")
        dicom_file = get_file(self.dataset)
        original_dataset = pydicom.dcmread(dicom_file)

        actions = [{"action": "REMOVE", "field": "values:value_set1"}]
        values = OrderedDict()
        values["value_set1"] = [
            {
                "field": "ConversionType",
                "action": "FIELD"
            },
        ]
        recipe = create_recipe(actions, values=values)

        # Check that values we want are present using DicomParser
        parser = DicomParser(dicom_file, recipe=recipe)
        parser.parse()
        self.assertEqual(len(parser.lookup["value_set1"]), 0)

        # Perform action
        result = replace_identifiers(
            dicom_files=dicom_file,
            deid=recipe,
            save=False,
            remove_private=False,
            strip_sequences=False,
        )
        self.assertEqual(1, len(result))
        self.assertEqual(len(original_dataset), len(result[0]))
    def test_valueset_private(self):
        """
        %values value_set2_private
        FIELD 00311020
        SPLIT 00090010 by=" ";minlength=4
        %header
        REMOVE values:value_set2_private
        """

        print("Test private tag valueset")
        dicom_file = get_file(self.dataset)

        actions = [{"action": "REMOVE", "field": "values:value_set2_private"}]
        values = OrderedDict()
        values["value_set2_private"] = [
            {
                "field": "00311020",
                "action": "FIELD"
            },
            {
                "value": 'by=" ";minlength=4',
                "field": "00090010",
                "action": "SPLIT"
            },
        ]
        recipe = create_recipe(actions, values=values)

        parser = DicomParser(dicom_file, recipe=recipe)
        parser.parse()
        for entry in ["SIEMENS", "M1212121", "DUMMY"]:
            assert entry in parser.lookup["value_set2_private"]

        with self.assertRaises(KeyError):
            check1 = parser.dicom["OtherPatientIDs"].value
        with self.assertRaises(KeyError):
            check2 = parser.dicom["Manufacturer"].value
        with self.assertRaises(KeyError):
            check3 = parser.dicom["00190010"].value
    def test_remove_all_func(self):
        """
        %header
        REMOVE ALL func:contains_hibbard
        """
        print("Test tag removal by")
        dicom_file = get_file(self.dataset)

        def contains_hibbard(dicom, value, field, item):
            from pydicom.tag import Tag

            tag = Tag(field.element.tag)

            if tag in dicom:
                currentvalue = str(dicom.get(tag).value).lower()
                if "hibbard" in currentvalue:
                    return True
                return False

        actions = [{
            "action": "REMOVE",
            "field": "ALL",
            "value": "func:contains_hibbard"
        }]
        recipe = create_recipe(actions)

        # Create a parser, define function for it
        parser = DicomParser(dicom_file, recipe=recipe)
        parser.define("contains_hibbard", contains_hibbard)
        parser.parse()

        self.assertEqual(156, len(parser.dicom))
        with self.assertRaises(KeyError):
            check1 = parser.dicom["ReferringPhysicianName"].value
        with self.assertRaises(KeyError):
            check2 = parser.dicom["PhysiciansOfRecord"].value
        with self.assertRaises(KeyError):
            check3 = parser.dicom["RequestingPhysician"].value
        with self.assertRaises(KeyError):
            check4 = parser.dicom["00331019"].value
Exemple #12
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            x.element.value for uid, x in fields.items()
            if x.element.keyword.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = {
            uid: x
            for uid, x in fields.items() if "Instance" in x.element.keyword
        }
        actual = extract_fields_list(dicom, actions)
        for uid in expected:
            assert uid in actual

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        parser = DicomParser(dicom, recipe=self.deid)
        parser.define("cookie_names", expected_names)
        parser.define("operator_names", expected_operator)

        # This is for fields
        parser.define("instance_fields", expected)
        parser.define("id", "new-cookie-id")
        parser.define("source_id", "new-operator-id")
        parser.parse()

        # Were the changes made?
        assert parser.dicom.get("PatientID") == "new-cookie-id"
        assert parser.dicom.get("OperatorsName") == "new-operator-id"

        # Instance fields should be removed based on recipe
        for uid, field in parser.lookup["instance_fields"].items():
            self.assertTrue(field.element.keyword not in parser.dicom)

        # Start over
        dicom = get_dicom(self.dataset)

        # We need to provide ids with variables "id" and "source_id"
        ids = {
            dicom.filename: {
                "id": "new-cookie-id",
                "source_id": "new-operator-id"
            }
        }

        # Returns list of updated dicom, since save is False
        replaced = replace_identifiers(dicom,
                                       save=False,
                                       deid=self.deid,
                                       ids=ids)
        cleaned = replaced.pop()

        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")