Beispiel #1
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)  # removes empty / null

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            dicom.get(x) for x in fields if x.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = [x for x in fields if "Instance" in x]
        actual = extract_fields_list(dicom, actions)
        self.assertEqual(actual, expected)

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        ids[dicom.filename]["cookie_names"] = expected_names
        ids[dicom.filename]["operator_names"] = expected_operator

        # This is for fields
        ids[dicom.filename]["instance_fields"] = expected
        ids[dicom.filename]["id"] = "new-cookie-id"
        ids[dicom.filename]["source_id"] = "new-operator-id"

        replaced = replace_identifiers(dicom,
                                       ids=ids,
                                       save=False,
                                       deid=self.deid)
        cleaned = replaced.pop()
        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")

        # Currently we don't well handle tag types, so we convert to string
        for field in expected_operator:
            self.assertTrue(str(field) not in cleaned)
Beispiel #2
0
    def parse(self, strip_sequences=False, remove_private=False):
        """The parse action corresponds to iterating through fields, and
           for each one, saving a data structure with the full element,
           the string (with nested representation of the keywords)
           and the tag. We want to save all three in a flat list that is
           easy to search over, and also build up actions for the lookup
           on the first parsing.
        """
        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            remove_sequences(self.dicom)

        # Remove private tags at the onset, if requested
        if remove_private:
            self.remove_private()

        # In the parsing, we generate a list of DicomField objects.
        fields = self.get_fields(expand_sequences=True)

        # if we loaded a deid recipe
        if self.recipe.deid is not None:

            # Prepare additional lists of values and lookup fields (index by nested uid)
            if self.recipe.has_values_lists():
                for group, actions in self.recipe.get_values_lists().items():
                    self.lookup[group] = extract_values_list(dicom=self.dicom,
                                                             actions=actions,
                                                             fields=fields)

            if self.recipe.has_fields_lists():
                for group, actions in self.recipe.get_fields_lists().items():
                    self.lookup[group] = extract_fields_list(dicom=self.dicom,
                                                             actions=actions,
                                                             fields=fields)

            for action in self.recipe.get_actions():
                self.perform_action(
                    field=action.get("field"),
                    value=action.get("value"),
                    action=action.get("action"),
                )

        # Next perform actions in default config, only if not done
        for action in self.config["put"]["actions"]:
            self.perform_action(
                field=action.get("field"),
                value=action.get("value"),
                action=action.get("action"),
            )
Beispiel #3
0
def replace_identifiers(
    dicom_files,
    ids=None,
    deid=None,
    save=True,
    overwrite=False,
    output_folder=None,
    force=True,
    config=None,
    strip_sequences=True,
    remove_private=True,
):
    """replace identifiers using pydicom, can be slow when writing
       and saving new files. If you want to replace sequences, they need
       to be extracted with get_identifiers and expand_sequences to True.
    """
    dicom_files, recipe, config = _prepare_replace_config(dicom_files,
                                                          deid=deid,
                                                          config=config)

    # ids (a lookup) is not required
    ids = ids or {}

    # Parse through dicom files, update headers, and save
    updated_files = []
    for _, dicom_file in enumerate(dicom_files):

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)
        dicom_name = os.path.basename(dicom_file)

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)

        # Remove private tags at the onset, if requested
        if remove_private:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    """Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others."""
                    % dicom_name)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue

        # Include private tags (if not removed) plus dicom.dir
        fields = dicom_dir(dicom)

        if recipe.deid is not None:

            if dicom_file not in ids:
                ids[dicom_file] = {}

            # Prepare additional lists of values and fields (updates item)
            if recipe.has_values_lists():
                for group, actions in recipe.get_values_lists().items():
                    ids[dicom_file][group] = extract_values_list(
                        dicom=dicom, actions=actions)

            if recipe.has_fields_lists():
                for group, actions in recipe.get_fields_lists().items():
                    ids[dicom_file][group] = extract_fields_list(
                        dicom=dicom, actions=actions)

            for action in recipe.get_actions():
                dicom = perform_action(dicom=dicom,
                                       item=ids[dicom_file],
                                       action=action)

        # Next perform actions in default config, only if not done
        for action in config["put"]["actions"]:
            if action["field"] in fields:
                dicom = perform_action(dicom=dicom, action=action)

        # Assemble a new dataset, again accounting for private tags
        ds = Dataset()
        for field in dicom_dir(dicom):

            try:
                # Most fields are strings
                if isinstance(field, str):
                    ds.add(dicom.data_element(field))

                # Remainder are tags
                else:
                    ds.add(dicom.get(field))
            except:
                pass

        # Copy original data attributes
        attributes = [
            "is_little_endian",
            "is_implicit_VR",
            "is_decompressed",
            "read_encoding",
            "read_implicit_vr",
            "read_little_endian",
            "_parent_encoding",
        ]

        # We aren't including preamble, we will reset to be empty 128 bytes
        ds.preamble = b"\0" * 128

        for attribute in attributes:
            if hasattr(dicom, attribute):
                ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Original meta data                     # or default empty dataset
        file_metas = getattr(dicom, "file_meta", Dataset())

        # Media Storage SOP Instance UID can be identifying
        if hasattr(file_metas, "MediaStorageSOPInstanceUID"):
            file_metas.MediaStorageSOPInstanceUID = ""

        # Save meta data
        ds.file_meta = file_metas

        # Save to file?
        if save is True:
            ds = save_dicom(
                dicom=ds,
                dicom_file=dicom_file,
                output_folder=output_folder,
                overwrite=overwrite,
            )
        updated_files.append(ds)

    return updated_files
Beispiel #4
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            x.element.value for uid, x in fields.items()
            if x.element.keyword.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = {
            uid: x
            for uid, x in fields.items() if "Instance" in x.element.keyword
        }
        actual = extract_fields_list(dicom, actions)
        for uid in expected:
            assert uid in actual

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        parser = DicomParser(dicom, recipe=self.deid)
        parser.define("cookie_names", expected_names)
        parser.define("operator_names", expected_operator)

        # This is for fields
        parser.define("instance_fields", expected)
        parser.define("id", "new-cookie-id")
        parser.define("source_id", "new-operator-id")
        parser.parse()

        # Were the changes made?
        assert parser.dicom.get("PatientID") == "new-cookie-id"
        assert parser.dicom.get("OperatorsName") == "new-operator-id"

        # Instance fields should be removed based on recipe
        for uid, field in parser.lookup["instance_fields"].items():
            self.assertTrue(field.element.keyword not in parser.dicom)

        # Start over
        dicom = get_dicom(self.dataset)

        # We need to provide ids with variables "id" and "source_id"
        ids = {
            dicom.filename: {
                "id": "new-cookie-id",
                "source_id": "new-operator-id"
            }
        }

        # Returns list of updated dicom, since save is False
        replaced = replace_identifiers(dicom,
                                       save=False,
                                       deid=self.deid,
                                       ids=ids)
        cleaned = replaced.pop()

        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")