Ejemplo n.º 1
0
    def test_field_expansion(self):
        print("Test deid.dicom.fields expand_field_expression")
        from deid.dicom.fields import expand_field_expression

        dicom = get_dicom(self.dataset)

        contenders = get_fields(dicom)

        print("Testing that field expansion works for basic tags")
        fields = expand_field_expression(dicom=dicom,
                                         field="endswith:Time",
                                         contenders=contenders)

        # The fields returned should end in time
        for uid, field in fields.items():
            assert field.name.endswith("Time")

        print("Testing that we can also search private tags based on numbers.")
        fields = expand_field_expression(dicom=dicom,
                                         field="contains:0019",
                                         contenders=contenders)

        # The fields returned should include tag group or element 0019
        for uid, field in fields.items():
            assert "0019" in uid

        print("Testing nested private tags")
        dataset = get_dataset("animals")  # includes nested private tags
        dicom = get_dicom(dataset)
Ejemplo n.º 2
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)  # removes empty / null

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            dicom.get(x) for x in fields if x.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = [x for x in fields if "Instance" in x]
        actual = extract_fields_list(dicom, actions)
        self.assertEqual(actual, expected)

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        ids[dicom.filename]["cookie_names"] = expected_names
        ids[dicom.filename]["operator_names"] = expected_operator

        # This is for fields
        ids[dicom.filename]["instance_fields"] = expected
        ids[dicom.filename]["id"] = "new-cookie-id"
        ids[dicom.filename]["source_id"] = "new-operator-id"

        replaced = replace_identifiers(dicom,
                                       ids=ids,
                                       save=False,
                                       deid=self.deid)
        cleaned = replaced.pop()
        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")

        # Currently we don't well handle tag types, so we convert to string
        for field in expected_operator:
            self.assertTrue(str(field) not in cleaned)
Ejemplo n.º 3
0
def get_identifiers(dicom_files,
                    force=True,
                    config=None,
                    expand_sequences=True,
                    skip_fields=None):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, and does not include
        private tags.

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        expand_sequences: if True, expand sequences. Otherwise, skips
        skip_fields: if not None, added fields to skip

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    if skip_fields is not None:
        if not isinstance(skip_fields, list):
            skip_fields = [skip_fields]
        skip = skip + skip_fields

    for dicom_file in dicom_files:

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)

        if dicom_file not in ids:
            ids[dicom_file] = dict()

        ids[dicom_file] = get_fields(dicom,
                                     skip=skip,
                                     expand_sequences=expand_sequences)
    return ids
Ejemplo n.º 4
0
 def get_fields(self, expand_sequences=True):
     """expand all dicom fields into a list, where each entry is
        a DicomField. If we find a sequence, we unwrap it and
        represent the location with the name (e.g., Sequence__Child)
     """
     if not self.fields:
         self.fields = get_fields(
             dicom=self.dicom,
             expand_sequences=expand_sequences,
             seen=self.seen,
         )
     return self.fields
Ejemplo n.º 5
0
def get_shared_identifiers(dicom_files,
                           force=True,
                           config=None,
                           aggregate=None,
                           expand_sequences=True):
    """

    extract shared identifiers across a set of dicom files, intended for
    cases when a set of images (dicom) are being compressed into one file
    and the file (still) should have some searchable metadata. By default,
    we remove fields that differ between files. To aggregate unique, define
    a list of aggregate fields (aggregate).

    """

    bot.debug("Extracting shared identifiers for %s dicom" %
              (len(dicom_files)))

    if aggregate is None:
        aggregate = []

    if config is None:
        config = "%s/config.json" % (here)

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    for dicom_file in dicom_files:

        dicom = read_file(dicom_file, force=True)

        # Get list of fields, expanded sequences are flattened
        fields = get_fields(dicom,
                            skip=skip,
                            expand_sequences=expand_sequences)

        for key, val in fields.items():

            # If it's there, only keep if the same
            if key in ids:

                # Items to aggregate are appended, not removed
                if key in aggregate:
                    if val not in ids[key]:
                        ids[key].append(val)
                else:

                    # Keep only if equal between
                    if ids[key] == val:
                        continue
                    else:
                        del ids[key]
                        skip.append(key)
            else:
                if key in aggregate:
                    val = [val]
                ids[key] = val

    # For any aggregates that are one item, unwrap again
    for field in aggregate:
        if field in ids:
            if len(ids[field]) == 1:
                ids[field] = ids[field][0]

    return ids
Ejemplo n.º 6
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            x.element.value for uid, x in fields.items()
            if x.element.keyword.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = {
            uid: x
            for uid, x in fields.items() if "Instance" in x.element.keyword
        }
        actual = extract_fields_list(dicom, actions)
        for uid in expected:
            assert uid in actual

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        parser = DicomParser(dicom, recipe=self.deid)
        parser.define("cookie_names", expected_names)
        parser.define("operator_names", expected_operator)

        # This is for fields
        parser.define("instance_fields", expected)
        parser.define("id", "new-cookie-id")
        parser.define("source_id", "new-operator-id")
        parser.parse()

        # Were the changes made?
        assert parser.dicom.get("PatientID") == "new-cookie-id"
        assert parser.dicom.get("OperatorsName") == "new-operator-id"

        # Instance fields should be removed based on recipe
        for uid, field in parser.lookup["instance_fields"].items():
            self.assertTrue(field.element.keyword not in parser.dicom)

        # Start over
        dicom = get_dicom(self.dataset)

        # We need to provide ids with variables "id" and "source_id"
        ids = {
            dicom.filename: {
                "id": "new-cookie-id",
                "source_id": "new-operator-id"
            }
        }

        # Returns list of updated dicom, since save is False
        replaced = replace_identifiers(dicom,
                                       save=False,
                                       deid=self.deid,
                                       ids=ids)
        cleaned = replaced.pop()

        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")