Ejemplo n.º 1
0
    def test_field_expansion(self):
        print("Test deid.dicom.fields expand_field_expression")
        from deid.dicom.fields import expand_field_expression

        dicom = get_dicom(self.dataset)
        contenders = dicom.dir()

        print("Testing that field expansion works for basic tags")
        expand_field_expression(dicom=dicom,
                                field="endswith:Time",
                                contenders=contenders)

        print("Testing that field expansion works including private tags")
        contenders += [e.tag for e in get_private(dicom)]
        expand_field_expression(dicom=dicom,
                                field="endswith:Time",
                                contenders=contenders)

        print("Testing that we can also search private tags based on numbers.")
        fields = expand_field_expression(dicom=dicom,
                                         field="contains:0019",
                                         contenders=contenders)

        # We should have a tag object in the list now!
        assert isinstance(fields[0], BaseTag)

        print("Testing nested private tags")
        dataset = get_dataset("animals")  # includes nested private tags
        dicom = get_dicom(dataset)
Ejemplo n.º 2
0
 def remove_private(self):
     """Remove private tags from the loaded dicom"""
     try:
         self.dicom.remove_private_tags()
     except:
         bot.error(
             """Private tags for %s could not be completely removed, usually
                      this is due to invalid data type. Removing others."""
             % self.dicom_name)
         for ptag in get_private(self.dicom):
             del self.dicom[ptag.tag]
Ejemplo n.º 3
0
def dicom_dir(dicom):
    """Given a dicom file, return all fields (including private) if they
       are not removed. With private this might look like:

       ...
      'WindowCenterWidthExplanation',
      'WindowWidth',
      (0011, 0003),
      (0019, 0010),

      and both can be used as indices into the dicom (dicom.get(x))
    """
    # This becomes a list of strings and tags to be used as keys
    return dicom.dir() + [t.tag for t in get_private(dicom)]
Ejemplo n.º 4
0
def replace_identifiers(
    dicom_files,
    ids=None,
    deid=None,
    save=True,
    overwrite=False,
    output_folder=None,
    force=True,
    config=None,
    strip_sequences=True,
    remove_private=True,
):
    """replace identifiers using pydicom, can be slow when writing
       and saving new files. If you want to replace sequences, they need
       to be extracted with get_identifiers and expand_sequences to True.
    """
    dicom_files, recipe, config = _prepare_replace_config(dicom_files,
                                                          deid=deid,
                                                          config=config)

    # ids (a lookup) is not required
    ids = ids or {}

    # Parse through dicom files, update headers, and save
    updated_files = []
    for _, dicom_file in enumerate(dicom_files):

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)
        dicom_name = os.path.basename(dicom_file)

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)

        # Remove private tags at the onset, if requested
        if remove_private:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    """Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others."""
                    % dicom_name)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue

        # Include private tags (if not removed) plus dicom.dir
        fields = dicom_dir(dicom)

        if recipe.deid is not None:

            if dicom_file not in ids:
                ids[dicom_file] = {}

            # Prepare additional lists of values and fields (updates item)
            if recipe.has_values_lists():
                for group, actions in recipe.get_values_lists().items():
                    ids[dicom_file][group] = extract_values_list(
                        dicom=dicom, actions=actions)

            if recipe.has_fields_lists():
                for group, actions in recipe.get_fields_lists().items():
                    ids[dicom_file][group] = extract_fields_list(
                        dicom=dicom, actions=actions)

            for action in recipe.get_actions():
                dicom = perform_action(dicom=dicom,
                                       item=ids[dicom_file],
                                       action=action)

        # Next perform actions in default config, only if not done
        for action in config["put"]["actions"]:
            if action["field"] in fields:
                dicom = perform_action(dicom=dicom, action=action)

        # Assemble a new dataset, again accounting for private tags
        ds = Dataset()
        for field in dicom_dir(dicom):

            try:
                # Most fields are strings
                if isinstance(field, str):
                    ds.add(dicom.data_element(field))

                # Remainder are tags
                else:
                    ds.add(dicom.get(field))
            except:
                pass

        # Copy original data attributes
        attributes = [
            "is_little_endian",
            "is_implicit_VR",
            "is_decompressed",
            "read_encoding",
            "read_implicit_vr",
            "read_little_endian",
            "_parent_encoding",
        ]

        # We aren't including preamble, we will reset to be empty 128 bytes
        ds.preamble = b"\0" * 128

        for attribute in attributes:
            if hasattr(dicom, attribute):
                ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Original meta data                     # or default empty dataset
        file_metas = getattr(dicom, "file_meta", Dataset())

        # Media Storage SOP Instance UID can be identifying
        if hasattr(file_metas, "MediaStorageSOPInstanceUID"):
            file_metas.MediaStorageSOPInstanceUID = ""

        # Save meta data
        ds.file_meta = file_metas

        # Save to file?
        if save is True:
            ds = save_dicom(
                dicom=ds,
                dicom_file=dicom_file,
                output_folder=output_folder,
                overwrite=overwrite,
            )
        updated_files.append(ds)

    return updated_files
Ejemplo n.º 5
0
def replace_identifiers(dicom_files,
                        ids,
                        deid=None,
                        save=True,
                        overwrite=False,
                        output_folder=None,
                        force=True,
                        config=None,
                        strip_sequences=True,
                        remove_private=True):
    '''replace identifiers using pydicom, can be slow when writing
       and saving new files. If you want to replace sequences, they need
       to be extracted with get_identifiers and expand_sequences to True.
    '''
    dicom_files, recipe, config = _prepare_replace_config(dicom_files,
                                                          deid=deid,
                                                          config=config)

    # Parse through dicom files, update headers, and save
    updated_files = []
    for _, dicom_file in enumerate(dicom_files):
        dicom = read_file(dicom_file, force=force)
        dicom_name = os.path.basename(dicom_file)
        fields = dicom.dir()

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)

        if recipe.deid is not None:
            if dicom_file in ids:
                for action in deid.get_actions():
                    dicom = perform_action(dicom=dicom,
                                           item=ids[dicom_file],
                                           action=action)
            else:
                bot.warning("%s is not in identifiers." % dicom_name)
                continue

        # Next perform actions in default config, only if not done
        for action in config['put']['actions']:
            if action['field'] in fields:
                dicom = perform_action(dicom=dicom, action=action)
        if remove_private is True:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    '''Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others.'''
                    % dicom_name)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue
        else:
            bot.warning("Private tags were not removed!")

        ds = Dataset()
        for field in dicom.dir():
            try:
                ds.add(dicom.data_element(field))
            except:
                pass

        # Copy original data attributes
        attributes = [
            'is_little_endian', 'is_implicit_VR', 'is_decompressed',
            'read_encoding', 'read_implicit_vr', 'read_little_endian',
            '_parent_encoding'
        ]

        # We aren't including preamble, we will reset to be empty 128 bytes
        ds.preamble = b"\0" * 128

        for attribute in attributes:
            if hasattr(dicom, attribute):
                ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Original meta data                     # or default empty dataset
        file_metas = getattr(dicom, 'file_meta', Dataset())

        # Media Storage SOP Instance UID can be identifying
        if hasattr(file_metas, 'MediaStorageSOPInstanceUID'):
            file_metas.MediaStorageSOPInstanceUID = ''

        # Save meta data
        ds.file_meta = file_metas

        # Save to file?
        if save is True:
            ds = save_dicom(dicom=ds,
                            dicom_file=dicom_file,
                            output_folder=output_folder,
                            overwrite=overwrite)
        updated_files.append(ds)

    return updated_files
Ejemplo n.º 6
0
def replace_identifiers(dicom_files,
                        ids,
                        deid=None,
                        save=True,
                        overwrite=False,
                        output_folder=None,
                        force=True,
                        config=None,
                        strip_sequences=True,
                        remove_private=True):
    '''replace identifiers using pydicom, can be slow when writing
    and saving new files'''

    dicom_files, deid, config = _prepare_replace_config(dicom_files,
                                                        deid=deid,
                                                        config=config)

    # Parse through dicom files, update headers, and save
    updated_files = []
    for d in range(len(dicom_files)):
        dicom_file = dicom_files[d]
        dicom = read_file(dicom_file, force=force)
        idx = os.path.basename(dicom_file)
        fields = dicom.dir()

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)
        if deid is not None:
            if idx in ids:
                for action in deid['header']:
                    dicom = perform_action(dicom=dicom,
                                           item=ids[idx],
                                           action=action)
            else:
                bot.warning("%s is not in identifiers." % idx)
                continue
        # Next perform actions in default config, only if not done
        for action in config['put']['actions']:
            if action['field'] in fields:
                dicom = perform_action(dicom=dicom, action=action)
        if remove_private is True:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    '''Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others.'''
                    % idx)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue
        else:
            bot.warning("Private tags were not removed!")
        ds = Dataset()
        for field in dicom.dir():
            try:
                ds.add(dicom.data_element(field))
            except:
                pass

        # Copy original data types
        attributes = [
            'is_little_endian', 'is_implicit_VR', 'preamble',
            '_parent_encoding'
        ]
        for attribute in attributes:
            ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Retain required meta data
        file_metas = getattr(dicom, 'file_meta', Dataset())

        # Retain required meta data - not identifying
        # file_metas.MediaStorageSOPClassUID
        # file_metas.MediaStorageSOPInstanceUID
        # file_metas.ImplementationVersionName
        # file_metas.ImplementationClassUID

        # File attributes for meta
        attributes = [
            'TransferSyntaxUID', 'FileMetaInformationGroupLength',
            'FileMetaInformationVersion'
        ]
        for attribute in attributes:
            file_metas.add(dicom.file_meta.data_element(attribute))

        # Preamble is required
        ds.file_meta = file_metas
        ds.preamble = vars(dicom)['preamble']

        # Save to file?
        if save is True:
            ds = save_dicom(dicom=ds,
                            dicom_file=dicom_file,
                            output_folder=output_folder,
                            overwrite=overwrite)
        updated_files.append(ds)

    return updated_files