def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str: """Find DICOM file paths in the specified root directory file tree. Parameters ---------- root_path Path to the root directory specifying the file hierarchy. yield_directories, optional Whether to yield paths to directories containing DICOM files or separately to each file (default). Yields ------ The paths to DICOM files or DICOM-containing directories (if `yield_directories` is True). """ # TODO add some filtering options for root, _, files in os.walk(root_path): if yield_directories: if any( (is_dicom(pathlib.Path(root, f).as_posix()) for f in files)): yield root else: for f in files: fpath = pathlib.Path(root, f).as_posix() if is_dicom(fpath): yield fpath
def test_is_dicom(self): """Test the is_dicom function.""" invalid_file = test_file.replace('CT_', 'CT') # invalid file notdicom_file = osp.abspath(__file__) # use own file # valid file returns True self.assertTrue(is_dicom(test_file)) # return false for real file but not dicom self.assertFalse(is_dicom(notdicom_file)) # test invalid path self.assertRaises(IOError, is_dicom, invalid_file)
def test_is_dicom(self): """Test the is_dicom function.""" notdicom_file = os.path.abspath(__file__) # use own file # valid file returns True assert is_dicom(test_file) # return false for real file but not dicom assert not is_dicom(notdicom_file) # test invalid path with pytest.raises(IOError): is_dicom('xxxx.dcm') # Test no meta prefix/preamble fails assert not is_dicom(no_meta_file)
def test_is_dicom(self): """Test the is_dicom function.""" invalid_file = test_file.replace('CT_', 'CT') # invalid file notdicom_file = os.path.abspath(__file__) # use own file # valid file returns True assert is_dicom(test_file) # return false for real file but not dicom assert not is_dicom(notdicom_file) # test invalid path with pytest.raises(IOError): is_dicom(invalid_file) # Test no meta prefix/preamble fails assert not is_dicom(no_meta_file)
def scan_dcm(path): fl = [] pathToGlob = Path(path) if pathToGlob.exists(): f_list = list(pathToGlob.rglob("*")) # pprint(f_list) for i in f_list: if i.is_file(): if is_dicom(i): fl.append(i) return fl
def __init__(self, source_directory): self.source_directory = source_directory os.chdir(self.source_directory) print ('In ReadDICOMFiles...') for file in glob.glob("*.dcm"): print(file) if is_dicom(file): print("It is indeed DICOM!") dcm_file = pydicom.dcmread(file) else: print("It's probably not DICOM")
def find_dicom_files(directory, pattern="*", directory_exclude_pattern='', recursive=True): """ search a root directory for all files matching a given pattern (in Glob format - *.dcm etc) and that have the "DICM" magic number returns a full path name """ for root, dirs, files in os.walk(directory): if not recursive: dirs = [] for x in dirs: if fnmatch.fnmatch(x, directory_exclude_pattern): try: dirs.remove(x) except Exception: pass for basename in files: if fnmatch.fnmatch(basename, pattern): filename = os.path.join(root, basename) if is_dicom(filename): yield filename
def retrieve(self): #global global_fifo_q os.chdir(self.source_directory) for file in glob.glob("*.dcm"): print(file) if is_dicom(file): print("It is indeed DICOM!") dcm_file = pydicom.dcmread(file) print(dcm_file) parsed_result = {} testType = dcm_file.StudyDescription studyDate = dcm_file.StudyDate studyTime = dcm_file.StudyTime deviceSerialNumber = dcm_file.DeviceSerialNumber institutionName = dcm_file.InstitutionName manufacturer = dcm_file.Manufacturer manufacturerModelName = dcm_file.ManufacturerModelName entranceDose = dcm_file.EntranceDoseInmGy studyInstanceUID = dcm_file.StudyInstanceUID seriesInstanceUID = dcm_file.SeriesInstanceUID parsed_result["testInfo"] = { "testType": testType, "studyDate": studyDate, "studyTime": studyTime, "deviceSerialNumber": deviceSerialNumber, "institutuionName": institutionName, "manufacturerModelName": manufacturerModelName, "manufacturer": manufacturer, "entranceDoseinmGy": entranceDose, "studyInstanceUID": studyInstanceUID, "seriesInstanceUID": seriesInstanceUID } user_firstName = dcm_file.PatientName.given_name user_lastName = dcm_file.PatientName.family_name ethnic_group = dcm_file.EthnicGroup user_birthdate = dcm_file.PatientBirthDate user_sex = dcm_file.PatientSex user_id = dcm_file.PatientID if user_id is '': user_id = str(user_firstName) + "." + str( user_lastName) + "." + str( user_birthdate) + "@noemail.unk" user_age = dcm_file.PatientAge user_Size = dcm_file.PatientSize user_Weight = dcm_file.PatientWeight parsed_result["userInfo"] = { "firstName": user_firstName, "lastName": user_lastName, "email": user_id, "ethnicGroup": ethnic_group, "birthDate": user_birthdate, "userSex": user_sex, "userAge": user_age, "userSize": user_Size, "userWeight": user_Weight } xml_string = dcm_file.ImageComments xml_root = etree.fromstring(xml_string) parsed_result["bodyComposition"] = {} for leaf in xml_root.iter('COMP_ROI'): regionName = lowerCamelCase(leaf.attrib['region']) parsed_result["bodyComposition"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue( float(reading.text)) else: value = normalizeWeightValue( float(reading.text), units) # save the reading parsed_result["bodyComposition"][regionName][ key] = value parsed_result["BMD"] = {} #print ("XML_ROOT", xml_root) for leaf in xml_root.iter('ROI'): regionName = lowerCamelCase(leaf.attrib['region']) parsed_result["BMD"][regionName] = {} for reading in leaf.iter(): if reading.text is None: continue elif reading.text is '-': continue key = lowerCamelCase(reading.tag) # units = reading.attrib['units'].strip() value = float(reading.text) parsed_result["BMD"][regionName][key] = value parsed_result["visceralFat"] = {} for leaf in xml_root.iter('VAT_MASS'): regionName = lowerCamelCase( 'Estimated Visceral Adipose Tissue') parsed_result["visceralFat"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue( float(reading.text)) else: value = normalizeWeightValue( float(reading.text), units) # save the reading parsed_result["visceralFat"][regionName][key] = value # convert it all to JSON and Save self.json_result = json.dumps(parsed_result) print(self.json_result) # Add data to queue #data_to_process = (user_id, json_result, studyDate, studyTime, file) #global_fifo_q.put(data_to_process) data_to_process = (user_id, self.json_result, studyDate, studyTime, file, user_firstName, user_lastName, user_birthdate, studyInstanceUID) self.ins_var_global_fifo_q.put(data_to_process) else: print("It's probably not DICOM") print("Trying to move NON-DICOM file to:" + self.error_directory) #Handle Error and send notifiction #Move to error directory #shutil.move(file, self.error_directory) try: shutil.move(file, self.error_directory) except shutil.Error as e: print('Error: %s' % e) os.remove(file) pass # eg. source or destination doesn't exist except IOError as e: print('Error: %s' % e.strerror) os.remove(file) pass
return result # Normalize percentage values into preferred values [0, 1] def normalizePercentageValue(value): result = float(value) / float(100) return result ''' JSON Parse Execution ''' # Change to os.walk path #dcm_file = pydicom.read_file('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm') if is_dicom('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm'): print("It is indeed DICOM!") dcm_file = pydicom.dcmread('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm') else: print("It's probably not DICOM") parsed_result = {} testType = dcm_file.StudyDescription studyDate = dcm_file.StudyDate studyTime = dcm_file.StudyTime deviceSerialNumber = dcm_file.DeviceSerialNumber institutionName = dcm_file.InstitutionName manufacturer = dcm_file.Manufacturer #manufacturerModelName = dcm_file.ManufacturersModelName #entranceDose = dcm_file.EntranceDoseinmGy
def dump_series2json(dcm_root, mode='one_per_dir', series_file_pattern='00000001.dcm'): """ :param dcm_root: :param mode: 'one_per_dir' :param series_file_pattern: :return: """ if not os.path.exists(dcm_root): return {} series = {'01_PatientName':[], '02_PatientID':[], '03_StudyDate':[], '04_AcquisitionDateTime':[], '05_SeriesDescription':[], '06_NumberofSlices':[], '07_Load':[], '08_SeriesRoot':[], '09_SeriesFiles':[]} for subdir, _, files in os.walk(dcm_root): if len(files) <= 0: continue print('working on %s' % (subdir)) # find right patterned files re_pattern = series_file_pattern.replace('*', '.*') r_files = [] if mode != 'one_per_dir': r_files = files else: for file in files: r_files += re.findall(re_pattern, file) r_files.sort() r_files = [r_files[0]] # iterate all found files pnames = [] pids = [] sdates = [] sdecrps = [] acqdatetimes = [] subdirs = [] slices = {} series_keys = [] for file in r_files: dcm_file = os.path.join(subdir, file) if not is_dicom(dcm_file): continue ds = pydicom.read_file(dcm_file) if hasattr(ds, 'AcquisitionDate'): acqdate = str(ds.AcquisitionDate) else: acqdate = 'NA' if hasattr(ds, 'AcquisitionTime'): acqtime = str(ds.AcquisitionTime) else: acqtime = 'NA' if hasattr(ds, 'PatientName'): pname = str(ds.PatientName) else: pname = 'NA' if hasattr(ds, 'PatientID'): pid = str(ds.PatientID) else: pid = 'NA' if hasattr(ds, 'StudyDate'): sdate = str(ds.StudyDate) else: sdate = 'NA' if hasattr(ds, 'SeriesDescription'): sdecrp = str(ds.SeriesDescription) else: sdecrp = 'NA' series_key = str(sdecrp + subdir) if series_key not in series_keys: series_keys.append(series_key) slices[series_key] = [] pnames.append(pname) pids.append(pid) sdates.append(sdate) sdecrps.append(sdecrp) acqdatetimes.append(acqdate + acqtime) subdirs.append(subdir) slices[series_key].append(file) # append to series series['01_PatientName'] += pnames series['02_PatientID'] += pids series['03_StudyDate'] += sdates series['04_AcquisitionDateTime'] += acqdatetimes series['05_SeriesDescription'] += sdecrps series['07_Load'] += [False] * len(pnames) series['08_SeriesRoot'] += subdirs for series_key in series_keys: series['06_NumberofSlices'].append(len(slices[series_key])) series['09_SeriesFiles'].append(slices[series_key]) return series
def Main(): ''' JSON Parse Execution ''' # Change to os.walk path # dcm_file = pydicom.read_file('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm') if is_dicom('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm'): print("It is indeed DICOM!") dcm_file = pydicom.dcmread( 'c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm') else: print("It's probably not DICOM") parsed_result = {} testType = dcm_file.StudyDescription studyDate = dcm_file.StudyDate studyTime = dcm_file.StudyTime deviceSerialNumber = dcm_file.DeviceSerialNumber institutionName = dcm_file.InstitutionName manufacturer = dcm_file.Manufacturer # manufacturerModelName = dcm_file.ManufacturersModelName # entranceDose = dcm_file.EntranceDoseinmGy studyInstanceUID = dcm_file.StudyInstanceUID seriesInstanceUID = dcm_file.SeriesInstanceUID parsed_result["testInfo"] = { "testType": testType, "studyDate": studyDate, "studyTime": studyTime, "deviceSerialNumber": deviceSerialNumber, "institutuionName": institutionName, # "manufacturerModelName": manufacturerModelName, "manufacturer": manufacturer, # "entranceDoseinmGy": entranceDose, "studyInstanceUID": studyInstanceUID, "seriesInstanceUID": seriesInstanceUID } # user_firstName = dcm_file.PatientsName.given_name # user_lastName = dcm_file.PatientsName.family_name user_id = dcm_file.PatientID ethnic_group = dcm_file.EthnicGroup # user_birthdate = dcm_file.PatientsBirthDate # user_sex = dcm_file.PatientsSex # user_age = dcm_file.PatientsAge # user_Size = dcm_file.PatientsSize # user_Weight = dcm_file.PatientsWeight parsed_result["userInfo"] = { # "firstName": user_firstName, # "lastName": user_lastName, "email": user_id, "ethnicGroup": ethnic_group, # "birthDate": user_birthdate, # "userSex": user_sex, # "userAge": user_age, # "userSize": user_Size, # "userWeight": user_Weight } xml_string = dcm_file.ImageComments xml_root = etree.fromstring(xml_string) parsed_result["bodyComposition"] = {} for leaf in xml_root.iter('COMP_ROI'): regionName = lowerCamelCase(leaf.attrib['region']) parsed_result["bodyComposition"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue(float(reading.text)) else: value = normalizeWeightValue(float(reading.text), units) # save the reading parsed_result["bodyComposition"][regionName][key] = value parsed_result["BMD"] = {} for leaf in xml_root.iter('ROI'): regionName = lowerCamelCase(leaf.attrib['region']) parsed_result["BMD"][regionName] = {} for reading in leaf.iter(): if reading.text is None: continue elif reading.text is '-': continue key = lowerCamelCase(reading.tag) # units = reading.attrib['units'].strip() value = float(reading.text) parsed_result["BMD"][regionName][key] = value parsed_result["visceralFat"] = {} for leaf in xml_root.iter('VAT_MASS'): regionName = lowerCamelCase('Estimated Visceral Adipose Tissue') parsed_result["visceralFat"][regionName] = {} for reading in leaf.iter(): # skip attributes that don't have a value if not 'units' in reading.attrib: continue # Normalize the value (% or lbs) key = lowerCamelCase(reading.tag) units = reading.attrib['units'].strip() value = None if units == '%': value = normalizePercentageValue(float(reading.text)) else: value = normalizeWeightValue(float(reading.text), units) # save the reading parsed_result["visceralFat"][regionName][key] = value # convert it all to JSON and Save json_result = json.dumps(parsed_result) print(json_result) # Add data to queue data_to_process = (user_id, json_result, studyDate, studyTime) global_fifo_q.put(data_to_process) # When ready to save as JSON file. # with open('DXAParse.json','w') as outfile: # outfile.write(json_result) # DICOM File has been parsed to JSON and saved as a new file. source_directory = 'c:/temp/pending_dexafit_files/' f = ReadDICOMFiles(source_directory) #Store in DB # Database Connection Details dsn_database = "dexafitpostgres" # e.g. "compose" dsn_hostname = "dexafit-postgres-instance.cnhfhogvgcm2.us-east-2.rds.amazonaws.com" # e.g.: "aws-us-east-1-portal.4.dblayer.com" dsn_port = "5432" # e.g. 11101 dsn_uid = "aws_dexafit" # e.g. "admin" dsn_pwd = "$$dicomaws$$" # e.g. "xxx" s = StoreData(dsn_hostname ,dsn_port, dsn_database, dsn_uid, dsn_pwd) st = threading.Thread(target=s.retrieve_and_store, name = 'Retrieve Q Data', args=()) st.start() st.join()
def dump_series2json(dcm_root, mode='one_per_dir', series_file_pattern='00000001.dcm'): """ :param dcm_root: :param mode: 'one_per_dir' :param series_file_pattern: :return: """ if not os.path.exists(dcm_root): return {} series = {'01_PatientName':[], '02_PatientID':[], '03_StudyDate':[], '04_AcquisitionDateTime':[], '05_SeriesDescription':[], '06_NumberofSlices':[], '07_Load':[], '08_SeriesRoot':[], '09_SeriesFiles':[], '10_Type':[], '11_Func':[], '12_Task':[]} for subdir, _, files in os.walk(dcm_root): if len(files) <= 0: continue print('working on %s' % (subdir)) # find right patterned files #TODO: verify whether re_pattern is doing anything re_pattern = series_file_pattern.replace('*', '.*') r_files = [] if mode != 'one_per_dir': r_files = files else: for file in files: r_files += re.findall(re_pattern, file) r_files.sort() r_files = [r_files[0]] # iterate all found files pnames = [] pids = [] stypes = [] sdates = [] sdecrps = [] acqdatetimes = [] subdirs = [] slices = {} series_keys = [] tasks = [] funcs = [] for file in r_files: dcm_file = os.path.join(subdir, file) if not is_dicom(dcm_file): continue ds = pydicom.read_file(dcm_file) if hasattr(ds,'Modality'): stype = str(ds.Modality) else: stype = 'NA' if hasattr(ds, 'AcquisitionDate'): acqdate = str(ds.AcquisitionDate) else: acqdate = 'NA' if hasattr(ds, 'AcquisitionTime'): acqtime = str(ds.AcquisitionTime) else: acqtime = 'NA' if hasattr(ds, 'PatientName'): pname = str(ds.PatientName) else: pname = 'NA' if hasattr(ds, 'PatientID'): pid = str(ds.PatientID) else: pid = 'NA' if hasattr(ds, 'StudyDate'): sdate = str(ds.StudyDate) else: sdate = 'NA' try: func = ds[0x0065,0x102b].value.split('\\')[1] except: func = 'NA' try: task = ds[0x0065,0x100c].value except: task = 'NA' if hasattr(ds, 'SeriesDescription'): sdecrp = str(ds.SeriesDescription) else: sdecrp = 'NA' series_key = str(sdecrp + subdir) if series_key not in series_keys: series_keys.append(series_key) slices[series_key] = [] pnames.append(pname) pids.append(pid) sdates.append(sdate) sdecrps.append(sdecrp) acqdatetimes.append(acqdate + acqtime) subdirs.append(subdir) stypes.append(stype) funcs.append(func) tasks.append(task) slices[series_key].append(file) # append to series series['01_PatientName'] += pnames series['02_PatientID'] += pids series['03_StudyDate'] += sdates series['04_AcquisitionDateTime'] += acqdatetimes series['05_SeriesDescription'] += sdecrps series['07_Load'] += [False] * len(pnames) series['08_SeriesRoot'] += subdirs series['10_Type'] += stypes series['11_Func'] += funcs series['12_Task'] += tasks for series_key in series_keys: series['06_NumberofSlices'].append(len(slices[series_key])) series['09_SeriesFiles'].append(slices[series_key]) return series