def test_anonymise_directory(tmp_path): temp_filepath = tmp_path / "test.dcm" temp_anon_filepath = label_dicom_filepath_as_anonymised(temp_filepath) try: copyfile(TEST_FILEPATH, temp_filepath) assert not is_anonymised_directory(tmp_path) # Test file deletion anonymise_directory( tmp_path, delete_original_files=False, anonymise_filenames=False ) # # File should be anonymised but not dir, since original file # # is still present. assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) assert not is_anonymised_directory(tmp_path) remove_file(temp_anon_filepath) anonymise_directory( tmp_path, delete_original_files=True, anonymise_filenames=False ) # # File and dir should be anonymised since original file should # # have been deleted. assert is_anonymised_file(temp_anon_filepath) assert not exists(temp_filepath) assert is_anonymised_directory(tmp_path) finally: remove_file(temp_anon_filepath)
def compare_dicom_cli(command, original, expected): pydicom.write_file(ORIGINAL_DICOM_FILENAME, original) try: subprocess.check_call(command) cli_adjusted_ds = pydicom.read_file(ADJUSTED_DICOM_FILENAME, force=True) assert str(cli_adjusted_ds) == str(expected) finally: remove_file(ORIGINAL_DICOM_FILENAME) remove_file(ADJUSTED_DICOM_FILENAME)
def _check_is_anonymised_dataset_file_and_dir( ds, tmp_path, anon_is_expected=True, ignore_private_tags=False ): temp_filepath = str(tmp_path / "test.dcm") try: create.set_default_transfer_syntax(ds) ds.file_meta = TEST_FILE_META ds.save_as(temp_filepath, write_like_original=False) if anon_is_expected: assert is_anonymised_dataset(ds, ignore_private_tags) assert is_anonymised_file(temp_filepath, ignore_private_tags) assert is_anonymised_directory(tmp_path, ignore_private_tags) else: assert not is_anonymised_dataset(ds, ignore_private_tags) assert not is_anonymised_file(temp_filepath, ignore_private_tags) assert not is_anonymised_directory(tmp_path, ignore_private_tags) finally: remove_file(temp_filepath)
def _test_pseudonymise_cli_for_file(tmp_path, test_file_path): temp_filepath = pjoin(tmp_path, "test.dcm") try: logging.info("CLI test on %s", test_file_path) copyfile(test_file_path, temp_filepath) # Basic file pseudonymisation # Initially, just make sure it exits with zero and doesn't fail to generate output assert not is_anonymised_file(temp_filepath) # need the SOP Instance UID and SOP Class name to figure out the destination file name # but will also be using the dataset to do some comparisons. ds_input: pydicom.FileDataset = pydicom.dcmread(temp_filepath, force=True) pseudo_sop_instance_uid = pseudonymisation_api.pseudonymisation_dispatch[ "UI"](ds_input.SOPInstanceUID) sop_class_uid: pydicom.dataelem.DataElement = ds_input.SOPClassUID mode_prefix = DICOM_SOP_CLASS_NAMES_MODE_PREFIXES[ sop_class_uid.name # pylint: disable = no-member ] temp_anon_filepath = pjoin( tmp_path, "{}.{}_Anonymised.dcm".format(mode_prefix, pseudo_sop_instance_uid), ) assert not exists(temp_anon_filepath) anon_file_command = "pymedphys --verbose experimental dicom anonymise --pseudo".split( ) + [temp_filepath] logging.info("Command line: %s", anon_file_command) try: subprocess.check_call(anon_file_command) assert exists(temp_anon_filepath) # assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) ds_pseudo = pydicom.dcmread(temp_anon_filepath, force=True) assert ds_input["PatientID"].value != ds_pseudo["PatientID"].value finally: remove_file(temp_anon_filepath) # Basic dir anonymisation assert not is_anonymised_directory(tmp_path) assert not exists(temp_anon_filepath) anon_dir_command = "pymedphys --verbose experimental dicom anonymise --pseudo".split( ) + [str(tmp_path)] try: subprocess.check_call(anon_dir_command) # assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) finally: remove_file(temp_anon_filepath) finally: remove_file(temp_filepath)
def test_anonymise_file(): assert not is_anonymised_file(TEST_FILEPATH) temp_basename = "{}_{}.dcm".format(".".join(TEST_FILEPATH.split(".")[:-1]), uuid4()) temp_filepath = pjoin(dirname(TEST_FILEPATH), temp_basename) anon_private_filepath = "" anon_filepath_orig = "" anon_filepath_pres = "" try: # Private tag handling anon_private_filepath = anonymise_file(TEST_FILEPATH, delete_private_tags=False) assert not is_anonymised_file(anon_private_filepath, ignore_private_tags=False) assert is_anonymised_file(anon_private_filepath, ignore_private_tags=True) anon_private_filepath = anonymise_file(TEST_FILEPATH, delete_private_tags=True) assert is_anonymised_file(anon_private_filepath, ignore_private_tags=False) # Filename is anonymised? assert basename(anon_private_filepath) == TEST_ANON_BASENAME # Deletion of original file copyfile(TEST_FILEPATH, temp_filepath) anon_filepath_orig = anonymise_file(temp_filepath, delete_original_file=True) assert is_anonymised_file(anon_filepath_orig) assert not exists(temp_filepath) # Preservation of filename if desired expected_filepath = "{}_Anonymised.dcm".format(".".join( TEST_FILEPATH.split(".")[:-1])) anon_filepath_pres = anonymise_file(TEST_FILEPATH, anonymise_filename=False) assert anon_filepath_pres == expected_filepath finally: remove_file(temp_filepath) remove_file(anon_private_filepath) remove_file(anon_filepath_orig) remove_file(anon_filepath_pres)
def test_anonymise_cli(tmp_path): temp_filepath = str(tmp_path / "test.dcm") try: copyfile(TEST_FILEPATH, temp_filepath) temp_anon_filepath = str(tmp_path / TEST_ANON_BASENAME) # Basic file anonymisation assert not is_anonymised_file(temp_filepath) assert not exists(temp_anon_filepath) anon_file_command = "pymedphys dicom anonymise".split() + [temp_filepath] try: subprocess.check_call(anon_file_command) assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) finally: remove_file(temp_anon_filepath) # File anonymisation - preserve filenames assert not is_anonymised_file(temp_filepath) expected_anon_filepath = label_dicom_filepath_as_anonymised(temp_filepath) assert not exists(expected_anon_filepath) anon_file_pres_command = "pymedphys dicom anonymise -f".split() + [ temp_filepath ] try: subprocess.check_call(anon_file_pres_command) assert is_anonymised_file(expected_anon_filepath) assert exists(temp_filepath) finally: remove_file(expected_anon_filepath) # File anonymisation - clear values assert not is_anonymised_file(temp_filepath) assert not exists(temp_anon_filepath) temp_cleared_anon_filepath = str(tmp_path / TEST_ANON_BASENAME) anon_file_clear_command = "pymedphys dicom anonymise -c".split() + [ temp_filepath ] try: subprocess.check_call(anon_file_clear_command) assert is_anonymised_file(temp_cleared_anon_filepath) assert pydicom.dcmread(temp_cleared_anon_filepath).PatientName == "" assert exists(temp_filepath) finally: remove_file(temp_cleared_anon_filepath) # File anonymisation - leave keywords unchanged assert not is_anonymised_file(temp_filepath) assert not exists(temp_anon_filepath) anon_file_keep_command = ( "pymedphys dicom anonymise".split() + [temp_filepath] + "-k PatientName".split() ) try: subprocess.check_call(anon_file_keep_command) assert not is_anonymised_file(temp_anon_filepath) ds = pydicom.dcmread(temp_anon_filepath) ds.PatientName = "Anonymous" assert is_anonymised_dataset(ds) assert exists(temp_filepath) finally: remove_file(temp_anon_filepath) # File anonymisation - private tag handling assert not is_anonymised_file(temp_filepath) assert not exists(temp_anon_filepath) anon_file_private_command = "pymedphys dicom anonymise -p".split() + [ temp_filepath ] try: subprocess.check_call(anon_file_private_command) assert not is_anonymised_file(temp_anon_filepath) assert is_anonymised_file(temp_anon_filepath, ignore_private_tags=True) assert exists(temp_filepath) finally: remove_file(temp_anon_filepath) # TODO: File anonymisation - unknown tag handling # # Calling a subprocess reloads BASELINE_DICOM_DICT... # Basic dir anonymisation assert not is_anonymised_directory(tmp_path) assert not exists(temp_anon_filepath) anon_dir_command = "pymedphys dicom anonymise".split() + [str(tmp_path)] try: subprocess.check_call(anon_dir_command) assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) finally: remove_file(temp_anon_filepath) finally: remove_file(temp_filepath)
def anonymise_directory( dicom_dirpath, output_dirpath=None, delete_original_files=False, anonymise_filenames=True, replace_values=True, keywords_to_leave_unchanged=(), delete_private_tags=True, delete_unknown_tags=None, replacement_strategy=None, identifying_keywords=None, fail_fast=True, ): r"""A simple tool to anonymise all DICOM files in a directory and its subdirectories. Parameters ---------- dicom_dirpath : ``str`` or ``pathlib.Path`` The path to the directory containing DICOM files to be anonymised. delete_original_files : ``bool``, optional If set to `True` and anonymisation completes successfully, then the original DICOM files are deleted. Defaults to `False`. anonymise_filenames : ``bool``, optional If ``True``, the DICOM filenames are replaced by filenames of the form: "<2 char DICOM modality>.<SOP Instance UID>_Anonymised.dcm". E.g.: "RP.2.16.840.1.113669.[...]_Anonymised.dcm" This ensures that the filenames contain no identifying information. If ``False``, ``anonymise_directory()`` simply appends "_Anonymised" to the original DICOM filenames. Defaults to ``True``. replace_values : ``bool``, optional If set to ``True``, DICOM tags will be anonymised using dummy "anonymous" values. This is often required for commercial software to successfully read anonymised DICOM files. If set to ``False``, anonymised tags are simply given empty string values. Defaults to ``True``. keywords_to_leave_unchanged : ``sequence``, optional A sequence of DICOM keywords (corresponding to tags) to exclude from anonymisation. Private and unknown tags can be supplied. Empty by default. delete_private_tags : ``bool``, optional A boolean to flag whether or not to remove all private (non-standard) DICOM tags from the DICOM file. These may also contain identifying information. Defaults to ``True``. delete_unknown_tags : ``bool``, pseudo-optional If left as the default value of ``None`` and ``ds`` contains tags that are not present in PyMedPhys` copy of `pydicom`'s DICOM dictionary, ``anonymise_dataset()`` will raise an error. The user must then either pass ``True`` or ``False`` to proceed. If set to ``True``, all unrecognised tags that haven't been listed in ``keywords_to_leave_unchanged`` will be deleted. If set to ``False``, these tags are simply ignored. Pass ``False`` with caution, since unrecognised tags may contain identifying information. replacement_strategy: ``dict`` (keys are VR, value is dispatch function), optional If left as the default value of ``None``, the hardcode replacement strategy is used. identifying_keywords: ``list``, optional If left as None, the default values for/list of identifying keywords are used fail_fast: ``bool``, optional, default to True If set to false, will continue attempts to convert files and only after completing translation and deleting original files (if specified) will raise an error to indicate not all files could be translated. Returns ------- ``list`` of anonymised file paths """ dicom_dirpath = str(dicom_dirpath) dicom_filepaths = glob(dicom_dirpath + "/**/*.dcm", recursive=True) failing_filepaths = [] successful_filepaths = [] anon_filepaths = [] errors = [] for dicom_filepath in dicom_filepaths: if output_dirpath is not None: relative_path = os.path.relpath(dicom_filepath, start=dicom_dirpath) output_filepath = os.path.join(output_dirpath, relative_path) else: output_filepath = None try: dicom_anon_filepath = anonymise_file( dicom_filepath, output_filepath=output_filepath, delete_original_file=delete_original_files, anonymise_filename=anonymise_filenames, replace_values=replace_values, keywords_to_leave_unchanged=keywords_to_leave_unchanged, delete_private_tags=delete_private_tags, delete_unknown_tags=delete_unknown_tags, replacement_strategy=replacement_strategy, identifying_keywords=identifying_keywords, ) successful_filepaths.append(dicom_filepath) anon_filepaths.append(dicom_anon_filepath) except (AttributeError, LookupError, TypeError, OSError, ValueError) as error: errors.append(error) failing_filepaths.append(dicom_filepath) logging.warning("Unable to anonymise %s", dicom_filepath) logging.warning(str(error)) if fail_fast: raise error # Separate loop provides the ability to raise Exceptions from the # unsuccessful deletion of the original DICOM files while preventing # these Exceptions from interrupting the batch anonymisation. if delete_original_files: for dicom_filepath in dicom_filepaths: if not dicom_filepath in failing_filepaths: remove_file(dicom_filepath) if len(errors) > 0: logging.info("Succeeded in anonymising: \n%s", "\n".join(successful_filepaths)) raise errors[0] return anon_filepaths
def anonymise_file( dicom_filepath, output_filepath=None, delete_original_file=False, anonymise_filename=True, replace_values=True, keywords_to_leave_unchanged=(), delete_private_tags=True, delete_unknown_tags=None, replacement_strategy=None, identifying_keywords=None, ): r"""A simple tool to anonymise a DICOM file. Parameters ---------- dicom_filepath : ``str`` or ``pathlib.Path`` The path to the DICOM file to be anonymised. delete_original_file : ``bool``, optional If `True` and anonymisation completes successfully, then the original DICOM is deleted. Defaults to ``False``. anonymise_filename : ``bool``, optional If ``True``, the DICOM filename is replaced by a filename of the form: "<2 char DICOM modality>.<SOP Instance UID>_Anonymised.dcm". E.g.: "RP.2.16.840.1.113669.[...]_Anonymised.dcm" This ensures that the filename contains no identifying information. If set to ``False``, ``anonymise_file()`` simply appends "_Anonymised" to the original DICOM filename. Defaults to ``True``. replace_values : ``bool``, optional If set to ``True``, DICOM tags will be anonymised using dummy "anonymous" values. This is often required for commercial software to successfully read anonymised DICOM files. If set to ``False``, anonymised tags are simply given empty string values. Defaults to ``True``. keywords_to_leave_unchanged : ``sequence``, optional A sequence of DICOM keywords (corresponding to tags) to exclude from anonymisation. Private and unknown tags can be supplied. Empty by default. delete_private_tags : ``bool``, optional A boolean to flag whether or not to remove all private (non-standard) DICOM tags from the DICOM file. These may also contain identifying information. Defaults to ``True``. delete_unknown_tags : ``bool``, pseudo-optional If left as the default value of ``None`` and ``ds`` contains tags that are not present in PyMedPhys' copy of ``pydicom``'s DICOM dictionary, ``anonymise_dataset()`` will raise an error. The user must then either pass ``True`` or ``False`` to proceed. If set to ``True``, all unrecognised tags that haven't been listed in ``keywords_to_leave_unchanged`` will be deleted. If set to ``False``, these tags are simply ignored. Pass ``False`` with caution, since unrecognised tags may contain identifying information. replacement_strategy: ``dict`` (keys are VR, value is dispatch function), optional If left as the default value of ``None``, the hardcode replacement strategy is used. identifying_keywords: ``list``, optional If left as None, the default values for/list of identifying keywords are used Returns ------- ``str`` The file path of the anonymised file """ dicom_filepath = str(dicom_filepath) ds = pydicom.dcmread(dicom_filepath, force=True) anonymise_dataset( ds=ds, replace_values=replace_values, keywords_to_leave_unchanged=keywords_to_leave_unchanged, delete_private_tags=delete_private_tags, delete_unknown_tags=delete_unknown_tags, copy_dataset=False, replacement_strategy=replacement_strategy, identifying_keywords=identifying_keywords, ) if output_filepath is None: output_filepath = dicom_filepath else: os.makedirs(os.path.split(output_filepath)[0], exist_ok=True) if anonymise_filename: filepath_used = core.create_filename_from_dataset( ds, dirpath=dirname(output_filepath)) else: filepath_used = output_filepath dicom_anon_filepath = core.label_dicom_filepath_as_anonymised( filepath_used) print(f"{dicom_filepath} --> {dicom_anon_filepath}") ds.save_as(dicom_anon_filepath) if delete_original_file: remove_file(dicom_filepath) return dicom_anon_filepath
def pseudonymise_buffer_list(file_buffer_list: list): """Pseudonymises the contents of the file_buffer_list (list of DICOM files) and places the pseudonymised files in to a set of zips, each less than 50 MBytes. Those zips are then made available for download through a set of links in the streamlit.sidebar The 50MByte limit is imposed by the href download link limit. The compression used is DEFLATE, but the uncompressed contents are kept at just under 50 MBytes If a single original uncompressed file is > 50MByte, and does not compress to under 50 MByte that file will fail to be made available for download, and may cause the entire pseudonymisation attempt to fail. Parameters ---------- file_buffer_list : list DICOM files that were uploaded using streamlit.file_uploader Returns ------- zip_path_list : list a list of full paths to the Zipped, pseudonymised files. If the io.BytesIO() approach is used (current default), the list will be empty """ zip_path_list: List = list() if file_buffer_list is not None and len(file_buffer_list) > 0: my_date_time = datetime.datetime.now() str_now_datetime = my_date_time.strftime("%Y%m%d_%H%M%S") zipfile_basename = f"Pseudonymised_{str_now_datetime}" bad_data = False index_to_fifty_mbyte_increment = _gen_index_list_to_fifty_mbyte_increment( file_buffer_list) st.write(index_to_fifty_mbyte_increment) zip_count = 0 start_index = 0 for end_index in index_to_fifty_mbyte_increment: if start_index == end_index: break zip_count += 1 zipfile_name = f"{zipfile_basename}.{zip_count}.zip" zipfile_path = DOWNLOADS_PATH.joinpath(zipfile_name) zip_bytes_io = io.BytesIO() bad_data = _zip_pseudo_fifty_mbytes( file_buffer_list[start_index:end_index], str(zipfile_path), zip_bytes_io=zip_bytes_io, ) start_index = end_index if bad_data: if zip_bytes_io is not None: zip_bytes_io.close() del zip_bytes_io else: remove_file(zipfile_name) st.text("Problem processing DICOM data") return list() else: if zip_bytes_io is not None: link_to_zipbuffer_download(zipfile_name, zip_bytes_io.getvalue()) zip_bytes_io.close() del zip_bytes_io else: link_to_zip_download(pathlib.Path(zipfile_path)) zip_path_list.append(zipfile_path) return zip_path_list
def anonymise_directory( dicom_dirpath, output_dirpath=None, delete_original_files=False, anonymise_filenames=True, replace_values=True, keywords_to_leave_unchanged=(), delete_private_tags=True, delete_unknown_tags=None, ): r"""A simple tool to anonymise all DICOM files in a directory and its subdirectories. Parameters ---------- dicom_dirpath : ``str`` or ``pathlib.Path`` The path to the directory containing DICOM files to be anonymised. delete_original_files : ``bool``, optional If set to `True` and anonymisation completes successfully, then the original DICOM files are deleted. Defaults to `False`. anonymise_filenames : ``bool``, optional If ``True``, the DICOM filenames are replaced by filenames of the form: "<2 char DICOM modality>.<SOP Instance UID>_Anonymised.dcm". E.g.: "RP.2.16.840.1.113669.[...]_Anonymised.dcm" This ensures that the filenames contain no identifying information. If ``False``, ``anonymise_directory()`` simply appends "_Anonymised" to the original DICOM filenames. Defaults to ``True``. replace_values : ``bool``, optional If set to ``True``, DICOM tags will be anonymised using dummy "anonymous" values. This is often required for commercial software to successfully read anonymised DICOM files. If set to ``False``, anonymised tags are simply given empty string values. Defaults to ``True``. keywords_to_leave_unchanged : ``sequence``, optional A sequence of DICOM keywords (corresponding to tags) to exclude from anonymisation. Private and unknown tags can be supplied. Empty by default. delete_private_tags : ``bool``, optional A boolean to flag whether or not to remove all private (non-standard) DICOM tags from the DICOM file. These may also contain identifying information. Defaults to ``True``. delete_unknown_tags : ``bool``, pseudo-optional If left as the default value of ``None`` and ``ds`` contains tags that are not present in PyMedPhys` copy of `pydicom`'s DICOM dictionary, ``anonymise_dataset()`` will raise an error. The user must then either pass ``True`` or ``False`` to proceed. If set to ``True``, all unrecognised tags that haven't been listed in ``keywords_to_leave_unchanged`` will be deleted. If set to ``False``, these tags are simply ignored. Pass ``False`` with caution, since unrecognised tags may contain identifying information. """ dicom_dirpath = str(dicom_dirpath) dicom_filepaths = glob(dicom_dirpath + "/**/*.dcm", recursive=True) failing_filepaths = [] # errors = [] for dicom_filepath in dicom_filepaths: if output_dirpath is not None: relative_path = os.path.relpath(dicom_filepath, start=dicom_dirpath) output_filepath = os.path.join(output_dirpath, relative_path) else: output_filepath = None anonymise_file( dicom_filepath, output_filepath=output_filepath, delete_original_file=delete_original_files, anonymise_filename=anonymise_filenames, replace_values=replace_values, keywords_to_leave_unchanged=keywords_to_leave_unchanged, delete_private_tags=delete_private_tags, delete_unknown_tags=delete_unknown_tags, ) # Separate loop provides the ability to raise Exceptions from the # unsuccessful deletion of the original DICOM files while preventing # these Exceptions from interrupting the batch anonymisation. if delete_original_files: for dicom_filepath in dicom_filepaths: if not dicom_filepath in failing_filepaths: remove_file(dicom_filepath)
def test_anonymise_directory(tmp_path): temp_filepath = tmp_path / "test.dcm" temp_anon_filepath = label_dicom_filepath_as_anonymised(temp_filepath) temp_record_filepath = tmp_path / "test_record.dcm" temp_anon_record_filepath = label_dicom_filepath_as_anonymised(temp_record_filepath) try: copyfile(get_rtplan_test_file_path(), temp_filepath) assert not is_anonymised_directory(tmp_path) # Test file deletion anon_path_list = anonymise_directory( tmp_path, delete_original_files=False, anonymise_filenames=False ) # # File should be anonymised but not dir, since original file # # is still present. assert is_anonymised_file(temp_anon_filepath) assert exists(temp_filepath) assert not is_anonymised_directory(tmp_path) assert anon_path_list is not None assert anon_path_list[0] == temp_anon_filepath remove_file(temp_anon_filepath) anon_path_list = anonymise_directory( tmp_path, delete_original_files=True, anonymise_filenames=False ) # # File and dir should be anonymised since original file should # # have been deleted. assert is_anonymised_file(temp_anon_filepath) assert not exists(temp_filepath) assert is_anonymised_directory(tmp_path) assert anon_path_list[0] == temp_anon_filepath # Test fail fast vs. fail at last # if the function fails fast, the specified removal # will not take place # if the function does not fail fail, the specified # removal will take place logging.warning("Testing fail fast") remove_file(temp_anon_filepath) copyfile(get_rtplan_test_file_path(), temp_filepath) copyfile(get_treatment_record_test_file_path(), temp_record_filepath) ds_record = pydicom.dcmread(temp_record_filepath, force=True) # deliberately add a DICOM element that is not in the current # dictionary ds_record.add_new([0x300A, 0x9999], "FL", [1.0, 1.0]) pydicom.dcmwrite(temp_record_filepath, ds_record) with pytest.raises((KeyError, ValueError)): anon_path_list = anonymise_directory( tmp_path, delete_original_files=True, anonymise_filenames=False, fail_fast=True, ) logging.warning(anon_path_list) assert exists(temp_filepath) with pytest.raises((KeyError, ValueError)): anon_path_list = anonymise_directory( tmp_path, delete_original_files=True, anonymise_filenames=False, fail_fast=False, ) logging.warning(anon_path_list) assert not exists(temp_filepath) finally: remove_file(temp_anon_filepath) remove_file(temp_anon_record_filepath)