def test_DICOM_validator(): file_name = get_test_DICOM_path() success, _ = DICOM_validate.file(file_name) assert success file_name = get_testdata_files("README.txt")[0] success, _ = DICOM_validate.file(file_name) assert not success
def filelist(file_list): """ Decompress all compressed files in the list OVERWRITE the files. :param file_list: :return: """ for file in tqdm(file_list, position=0): logger.debug(f"Checking decompression status for: {file}") # find if the file is DICOM, if not, skip this file. is_DICOM_file, _ = DICOM_validate.file(file) if not is_DICOM_file: continue # check if the file is compressed. TransferSyntax = DICOM_decompress.get_transferSyntax(file) try: RequireDecompression = DICOM_decompress.check_decompression( TransferSyntax ) if RequireDecompression: DICOM_decompress.save(file) except ValueError: logger.warning("Unknwonw DICOM syntax. You sure it is DICOM?") continue
def save_as(in_path: str, NewID: str, out_path: str) -> bool: """ Anonymize the DICOM to remove any identifiable information from a file and to a output file provided. This operate at the memory level so should be quite a bit faster. DICOM file check happens at the lowest DICOM_element level. # NOTE! Expand here if you need to anonymize additional fields. :param in_path: :param NewID: :param out_path: :return: """ success, DICOM = DICOM_validate.file(in_path) if not success: return False # Anonymize PatientID with the NewID provided. success1, DICOM_updated = DICOM_elements.update_in_memory( DICOM, "PatientID", NewID) if not success1: return False # Anonymize PatientName with the NewID provided. success2, DICOM_updated = DICOM_elements.update_in_memory( DICOM_updated, "PatientName", NewID) # Return after encuring both anonymization process are successful. if success2: DICOM_updated.save_as(out_path) return True else: return False
def get_dicom_files(self, consistency_check=True): """ A more secure way of getting DICOM files instead of directly reading the attribute (as it can be None) :return: """ # Validate all files and load them if they have not been loaded before. if self.dicom_files is None: self.validity, self.dicom_files = DICOM_validate.path( self.dicom_folder, consistency_check) return self.dicom_files
def read_upload_file(path_file): # check if the file is validated before continueing if DICOM_validate.file(path_file): content = read_file(path_file) # Upload and keep track of success. success = orthanc_query.upload(path_file, credential, content) logger.debug(f"Finished uploading:{path_file}") return success else: return False
def retrieve(file_path: str, data_element: str) -> (bool, Optional[str]): """ A low level function used to retrieve elements from DICOM and return a LIST of matching element. ACCEPT PARTIAL MATCH :param file_path: :param data_element: :return: LIST of all data elements that match the pattern provided in the data_element and their value. NO Regular EXPRESSION. """ success, DICOM = DICOM_validate.file(file_path) if not success: return False, None return DICOM_elements.retrieve_fast(DICOM, data_element)
def check_decompression_quick(file_path): # Validity check: success, DICOM = DICOM_validate.file(file_path) if not success: raise IOError("File is not DICOM") import pydicom.uid # Now read the meta information. if ( DICOM.file_meta.TransferSyntaxUID in pydicom.uid.UncompressedPixelTransferSyntaxes ): return True else: return False
def get_transferSyntax(file_path): """ Used to find if a file is compressed :param file_path: :return: """ # Validity check: success, _ = DICOM_validate.file(file_path) if not success: raise IOError("File is not DICOM") # Now read the meta information. dicom_file = read_file_meta_info(file_path) transfer_syntax = dicom_file.TransferSyntaxUID return transfer_syntax
def check_anonymization(files: list, anonymized_name) -> bool: """ A function to double check a list of files against the KNOWN anonymized value. This ensures that the anonymization actually gets carried out. NOTE!!!! This is the part where we have to ensure all the values are properly anonymized. @todo: generalize this such that it will provide a list of fields and then anonymize them all from the database etc. :param files: File must be the absolute path! :param anonymized_name: :return: """ from DICOMTransit.DICOM.elements import DICOM_elements from DICOMTransit.DICOM.elements_batch import DICOM_elements_batch from DICOMTransit.DICOM.validate import DICOM_validate # Check every single file in the DICOM collections. for file in tqdm(files, position=0): success, DICOM = DICOM_validate.file(file) if not success: return False properties = ["PatientID", "PatientName"] properties_output = DICOM_elements_batch.retrieval(DICOM, properties) success1, patient_id = DICOM_elements.retrieve_fast(DICOM, "PatientID") success2, name = DICOM_elements.retrieve_fast(DICOM, "PatientName") # bad retrieval. if not success1 or not success2: return False # not properly anonymized patient ID if not patient_id == anonymized_name: return False # not properly anonymized name. if not name == anonymized_name: return False return True
def check_validity(self): """ #@todo: this KEY WRAPPER FUNCTION function to check the validity of the object before conducting anything else. Currently not working because I am not using decorator functions properly. :param package_function: the package_function to be performed :return: """ # Update validity and dicom_files if they have not been done before. if self.validity is None: # Force an individual file level naming/user ID consistency check. self.validity, self.dicom_files = DICOM_validate.path( self.dicom_folder, consistency_check=True) # Check validity before moving forward with the update process: if self.validity is True: # package_function() return True else: return False
def compute_age(file_path): """ Read the PatientID field which normally used as MRN number. :param file_path: :return: Age as a relative delta time object. """ # @todo: refactor using existing functions. from dateutil.relativedelta import relativedelta success, DICOM = DICOM_validate.file(file_path) if not success: return False, None from datetime import datetime scan_date = datetime.strptime(DICOM.SeriesDate, "%Y%m%d") birthday = datetime.strptime(DICOM.PatientBirthDate, "%Y%m%d") age = relativedelta(scan_date, birthday) # age = scan_date - birthday return True, age
def update( file_path: str, data_element: str, element_value, out_path ) -> (bool, str): """ Update a particular data_element to the desired value, then write back to the SOURCE FILE! :param file_path: :param data_element: :param element_value: :param out_path :return: bool on operation success, and string on reason. """ """BE AWARE that if the key does not exist, it will not be created currently!""" success, DICOM = DICOM_validate.file(file_path) if not success: return False, "DICOM not valid." success, DICOM_updated = DICOM_elements.update_in_memory( DICOM, data_element, element_value ) if success: DICOM_updated.save_as(out_path) return True, "No error" return False, "Catch all error path"
def BatchDateCalculation(path): file_list = recursive_list(path) for file in file_list: if DICOM_validate.file(file): # Try to extract these information from the files. success1, StudyDate = DICOM_elements.retrieve(file, "StudyDate") success2, PatientBirthDate = DICOM_elements.retrieve( file, "PatientBirthDate") success3, age = DICOM_elements.compute_age(file) # Skip this file if it is not successful. if not success1 or not success2 or not success3: continue # Print, store and append the information acquired. A = [file, StudyDate, PatientBirthDate, str(age)] print(A) output.append(A) with open("output.csv", "w") as resultFile: wr = csv.writer(resultFile, dialect="excel") wr.writerow(output)
def traversal(dir_path: str, consistency_check: bool = True): """ Some basic information of the participants must be consistent across the files, such as the SCAN DATE (assuming they are not scanning across MIDNIGHT POINT) Birthday date, subject name, etc MUST BE CONSISTENT across a SINGLE subject's folder, RIGHT! :param dir_path: :returns: 0) if the path is valid, 2) list of ONLY the valid DICOM files. """ from DICOMTransit.DICOM.validate import DICOM_validate # Reject bad input check if not os.path.exists(dir_path) or not os.path.isdir(dir_path): logger.error("Bad data folder path") return False, None # Get all possible files from the there. files = recursive_list(dir_path) # Used to record the first encountered patientID and name, and will check against subsequent folder for same matching information. PatientID = "" PatientName = "" # List to store all validated DICOM files. validated_DICOM_files = [] from DICOMTransit.DICOM.elements import DICOM_elements logger.info( "Traversing individual dicom file for validation information.") list_unique_sUID = [] previous_sUID = None # a shorthand to bypass the list check. # Check individual DICOM file for consistencies. for file in tqdm(files, position=0): # Skip current file if they are not DICOM files. is_DICOM, dicom_obj = DICOM_validate.file(file) if not is_DICOM: logger.error( f"Bad DICOM files detected: {file}. They are not returned in the validated list!" ) continue # The following section checks individual files and determine if all files have consistency name/patient etc. # Useful for unanticipated ZIP files which can be contaminated. # Not useful when dealing with ORTHANC output files. if consistency_check: # @todo: what if one of them is NONE? # @todo: what if the date and other things are inconsistent? # Record first instance of patient ID and patient name. if PatientID == "" and PatientName == "": Success, PatientID = DICOM_elements.retrieve_fast( dicom_obj, "PatientID") Success, PatientName = DICOM_elements.retrieve_fast( dicom_obj, "PatientName") # raise issue if not successful if not Success: logger.error( "DICOM meta data retrieval failure EVEN for the first DICOM FILE?! Checking next one." ) else: name = PatientName.original_string.decode("latin_1") logger.debug( f"DICOM meta data retrieval success: {PatientID} {name}" ) # Regardless of success of failure, must continue to process the next file. continue # Check consistencies across folders in terms of patient ID, NAME. Success1, CurrentPatientID = DICOM_elements.retrieve_fast( dicom_obj, "PatientID") Success2, CurrentPatientName = DICOM_elements.retrieve_fast( dicom_obj, "PatientName") if not Success1 or not Success2: logger.error( "Could not retrieve fields for comparison. At least ONE DICOM file has inconsistent Patient ID/NAME field." ) return False, None if not (PatientID == CurrentPatientID) or not ( PatientName == CurrentPatientName): logger.info( "PatientID or Name mismatch from the dicom archive. .") return False, None success, UID = DICOM_elements.retrieve_fast( dicom_obj, "SeriesInstanceUID") # A quick UID check before the HEAVY list operation. if not UID == previous_sUID and UID not in list_unique_sUID: list_unique_sUID.append(UID) validated_DICOM_files.append(file) previous_sUID = UID return True, validated_DICOM_files, list_unique_sUID