def save_image_pointer(filepath, pat_name, output_dir, image_path, image_ptr_list, suffix): output_file = os.path.join( output_dir, "{}{}".format(pat_name.replace(" ", "_"), suffix)) with open(output_file, "w") as f: f.write( "### SeriesNum(0=normal cine, 1=tagged cine) SliceNum IndexNum Path delimiter = \"\\t\" ({}) ###\n" .format(filepath)) for item in image_ptr_list: f.write("{}\n".format(item)) log_and_print( ["Created image pointer file location: {}".format(output_file)]) return
def prepare_img_ptrs(basepath, filepath, suffix, cim_dir, cim_models, output_dir, output_dir_missing, output_dir_match): # create the output directories if they don't exist if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(output_dir_missing): os.makedirs(output_dir_missing) if not os.path.exists(output_dir_match): os.makedirs(output_dir_match) # prompt the user which functions the user wants to use steps = prompt_user() try: for s in steps: if s == 1: try: create_img_ptrs(filepath, output_dir, suffix) except KeyboardInterrupt: break if s == 2: try: move_img_ptrs(filepath, output_dir, output_dir_missing, suffix) except KeyboardInterrupt: break if s == 3: try: find_match(filepath, output_dir, cim_models, cim_dir, output_dir_match, suffix) except KeyboardInterrupt: break hrs, mins, secs = calculate_time_elapsed(start_) output_messages = [ "====================MAIN PROGRAM FINISHED!====================", "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages) except KeyboardInterrupt: print("Operation Interrupted") print("Log file stored in: {}".format(os.path.join(basepath, logname)))
def prepare_h5_files(filepaths, ptr_files_path, ed_h5_filepath, cim_patients, output_dir, output_filename, num_cases): # create the output directory if it is non-existent if not os.path.isdir(output_dir): os.makedirs(output_dir) log_and_print("h5 file will be stored in {}".format(output_dir)) log_and_print("Output filename: {}".format(output_filename)) # get all the pointer file paths ptr_files = [ f for f in os.listdir(ptr_files_path) if f.endswith("_match.img_imageptr") ] if num_cases is not None: ptr_files = ptr_files[:num_cases] log_and_print("Creating h5 file from {} pointer files\n".format( len(ptr_files))) # main function of the program to create shuffle_data = True if shuffle_data: log_and_print("Shuffling data...") shuffle(ptr_files) # Divide the data into 60% train, 20% validation, and 20% test # ref code: http://machinelearninguru.com/deep_learning/data_preparation/hdf5/hdf5.html train = ptr_files[0:int(0.6 * len(ptr_files))] val = ptr_files[int(0.6 * len(ptr_files)):int(0.8 * len(ptr_files))] test = ptr_files[int(0.8 * len(ptr_files)):] dataset_dict = {"train": train, "validation": val, "test": test} create_h5_file(filepaths, ptr_files_path, eds_h5_filepath, cim_patients, output_dir, output_filename, dataset_dict)
def find_match(filepath, ptr_files_path, cim_models, cim_dir, output_dir_match, suffix): start = time() #start time for this function output_messages = [ "====================FINDING MATCHES====================", "Operation started at {}".format(datetime.now().time()), "Finding matches for the image pointers stored in {}\n".format( ptr_files_path) ] log_and_print(output_messages) # list each pointer file ptr_files = [ ptr for ptr in os.listdir(ptr_files_path) if ptr.endswith("img_imageptr") ] # get all the paths to the cim ptr files cim_dirs = [os.path.join(cim_dir, d) for d in cim_models] cim_ptr_files = [] for cim_dir in cim_dirs: cim_ptr_files += get_pointer_paths(cim_dir) # initialise other variables file_c = len([ f for f in os.listdir(output_dir_match) if f.endswith(".img_imageptr") ]) # variable to keep file count initial_file_c = file_c #number of files at the start of the program tol = 1e-5 #tolerance to check if two slices match match = False #initialise match state start_i = int(input("Enter the patient number you want to start with: ")) print() # looping through each pointer file log_and_print( ["Creating new image pointers in {}\n".format(output_dir_match)]) try: for i, ptr in enumerate(ptr_files): if i >= start_i: ptr_path = os.path.join( ptr_files_path, ptr) #get the path to the image pointer match_ip = [ f for f in os.listdir(output_dir_match) if f.endswith(".img_imageptr") ] #list all image pointers created by this program patient_name = ptr.replace( suffix, "") #get the patient name from the image pointer file name filepath = "E:\\Original Images\\2015" #default filepath is filepath1 print("Checking patient {} of {}: {}".format( i + 1, len(ptr_files), patient_name)) output_filename = "{}_match.img_imageptr".format(patient_name) if output_filename not in match_ip: #if the image pointer with the matching slices doesn't exist for the patient yet try: cim_ptr_path = [ cim_ptr for cim_ptr in cim_ptr_files if patient_name.lower() in cim_ptr.lower() ][0] #get the cim pointer path of current patient logger.info( "Patient#{}: {} - CIM image pointer found: {}". format(i + 1, patient_name, cim_ptr_path)) except IndexError: try: cim_ptr_path = [ cim_ptr for cim_ptr in cim_ptr_files if patient_name.lower().replace("_bio", "") in cim_ptr.lower() ][0] #get the cim pointer path of current patient logger.info( "Patient#{}: {} - CIM image pointer found: {}". format(i + 1, patient_name, cim_ptr_path)) except IndexError: logger.error( "Patient#{}: {} - No image pointer file found in the CIM folders\n" .format(i + 1, patient_name)) continue # reading the content of the pointer file (cine_and_tagged, and cim) datatype = [('series', '<i4'), ('slice', '<i4'), ('index', '<i4'), ('path', 'U255')] ptr_content = np.genfromtxt( ptr_path, delimiter='\t', names='series, slice, index, path', skip_header=1, dtype=datatype) cim_ptr_content = np.genfromtxt( cim_ptr_path, delimiter='\t', names='series, slice, index, path', skip_header=1, dtype=datatype) with open( ptr_path, "r" ) as f: #obtain header to be written later to the output file together with the matching slices header = f.readline().strip() # only extract the first frame of each slice (cine_and_tagged, and cim) condition_1 = np.logical_and( ptr_content["series"] == 0, ptr_content["index"] == 0 ) #condition to find the first frames of each slice in normal cine series condition_2 = np.logical_and( ptr_content["series"] == 1, ptr_content["index"] == 0 ) #condition to find the first frames of each slice in tagged cine series condition_3 = np.logical_and( cim_ptr_content["series"] == 0, cim_ptr_content["index"] == 0 ) #condition to find the first frames of each slice in tagged cine series (cim) first_fr_cine = ptr_content[ condition_1] #extract the first frames from each slice in normal cine series first_fr_tagged = ptr_content[ condition_2] #extract the first frames from each slice in tagged cine series cim_first_fr_tagged = cim_ptr_content[ condition_3] #extract the first frames from each slice in tagged cine series of the cim ptr file # loop through the slices in cim tagged cine tagged_array = [] #reset tagged array cine_array = [] #reset cine array for cim_frame_t in cim_first_fr_tagged: cim_file_name = os.path.basename( cim_frame_t["path"] ) #get the filename of the first frame of the first slice in the cine imgpointer try: tagged_slice_i = [ curr_i for curr_i, curr_slice in enumerate( first_fr_tagged) if cim_file_name in curr_slice["path"] ][0] #get the slice index from the created image pointer that matches the current slice in the cim image pointer except IndexError: match = False logger.error("Error Occurred\n", exc_info=True) break tagged_slice = first_fr_tagged[ tagged_slice_i] #take tagged slice from our image pointer tagged_series = ptr_content[np.logical_and( ptr_content["series"] == 1, ptr_content["slice"] == tagged_slice["slice"] )] #get the whole series of that slice from the created image pointer tagged_series[:]["slice"] = cim_frame_t[ "slice"] #replace the slice number of the tagged series with the slice number of the tagged series in the cim image pointer file ''' if j == 0: #to initialise the array to be saved later tagged_array = tagged_series.flatten() else: tagged_array = np.append(tagged_array, tagged_series.flatten()) ''' first_fr_tagged = np.delete( first_fr_tagged, tagged_slice_i ) #delete added slice to reduce computational time tagged_image_file = tagged_slice["path"].replace( "IMAGEPATH", filepath) if not os.path.exists( tagged_image_file ): #if file is in the 2014 folder tagged_image_file = tagged_slice["path"].replace( "IMAGEPATH", "E:\\Original Images\\2014") filepath = "E:\\Original Images\\2014" ds_tag = pydicom.dcmread( tagged_image_file, specific_tags=[ "SliceLocation", "PatientPosition" ]) #get the slice location from metaheader # Loop through each slice in the cine series for k, frame_c in enumerate(first_fr_cine): cine_image_file = frame_c["path"].replace( "IMAGEPATH", filepath) #get the file path ds_cine = pydicom.dcmread( cine_image_file, specific_tags=[ "SliceLocation", "PatientPosition" ]) #read to get slice location if ds_tag.PatientPosition == ds_cine.PatientPosition: if abs( abs(ds_tag.SliceLocation) - abs(ds_cine.SliceLocation)) <= tol: cine_series = ptr_content[np.logical_and( ptr_content["series"] == 0, ptr_content["slice"] == frame_c["slice"] )] #get the entire cine series that matches the current tagged slice cine_series[:]["slice"] = cim_frame_t[ "slice"] #replace the slice number of the current cine series with the slice number from the tagged series of the cim image pointer file if not match: #to initialise the array to be saved later match = True cine_array = cine_series.flatten() tagged_array = tagged_series.flatten() else: cine_array = np.append( cine_array, cine_series.flatten()) tagged_array = np.append( tagged_array, tagged_series.flatten()) first_fr_cine = np.delete( first_fr_cine, k ) #delete added slice to reduce computational time else: log_and_print( ["Patient position not the same"]) if match: output_array = np.append( cine_array, tagged_array) #combine two arrays # create the image pointer file containing the slices in the cine and tagged series that match logger.info("New image pointer file: {}\n".format( os.path.join(output_dir_match, output_filename))) np.savetxt(os.path.join(output_dir_match, output_filename), output_array, fmt="%2d\t%2d\t%2d\t%s", delimiter="\t", header=header, comments="") file_c += 1 match = False #reset match status else: logger.error( "No match found! Patient directory: {}\n".format( os.path.dirname(cine_image_file))) continue hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================MATCHES FOUND!=========================", "Looped through {} of {} patients".format(i + 1 - start_i, len(ptr_files)), "Image pointers created during operation: {}".format( file_c - initial_file_c), "Total # of image pointers created: {}".format(file_c), "Image pointer files stored in {}".format(output_dir_match), "Operation finished at {}".format(str(datetime.now())), "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages) #sendemail("*****@*****.**", "*****@*****.**", "prepare_img_ptrs.py Program Finished", "Here's the log file:", os.path.join(os.getcwd(),logname)) #os.system("shutdown -s -t 0") except KeyboardInterrupt: hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================CREATION OF IMAGE POINTERS CANCELLED(MATCH)!=========================", "Operation cancelled at {}".format(str(datetime.now())), "Looped through {} of {} patients".format(i + 1 - start_i, len(ptr_files)), "Image pointers created during operation: {}".format( file_c - initial_file_c), "Total # of image pointers created: {}".format(file_c), "Image pointer files stored in {}".format(output_dir_match), "Last iteration info: Patient#{} {}".format(i, patient_name), "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages) except: logger.error("UNEXPECTED ERROR", exc_info=True)
def move_img_ptrs(filepath, ptr_path, output_dir_missing, suffix): start = time() #start time for this function output_messages = [ "====================MOVING IMAGE POINTERS====================", "Operation started at {}".format(datetime.now().time()) ] log_and_print(output_messages) # get the path to each pointer file ptr_files = [ os.path.join(ptr_path, ptr) for ptr in os.listdir(ptr_path) if ptr.endswith(".img_imageptr") ] # initialise other variables moved_c = len([ f for f in os.listdir(output_dir_missing) if f.endswith(".img_imageptr") ]) #moved image pointers count initial_moved_c = moved_c # looping through each pointer file log_and_print( ["Moving image pointer files to {}\n".format(output_dir_missing)]) try: for i, ptr in enumerate(ptr_files): # reading the content of the pointer file datatype = [('series', '<i4'), ('slice', '<i4'), ('index', '<i4'), ('path', 'U255')] ptr_content = np.genfromtxt(ptr, delimiter='\t', names='series, slice, index, path', skip_header=1, dtype=datatype) # only extract the first frame of each slice tagged_con = ptr_content[ "series"] == 1 #condition for tagged series first_frames_tagged = ptr_content[ tagged_con] #extract the first frames from each slice in tagged cine series patient_name = os.path.basename(ptr).replace( suffix, "") #get the patient name print("Checking patient {} of {}: {}".format( i + 1, len(ptr_files), patient_name)) if len(first_frames_tagged) == 0: moved_c += 1 log_and_print( ["Moving image pointer {}".format(os.path.basename(ptr))]) shutil.move(ptr, output_dir_missing) hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================IMAGE POINTERS MOVED!=========================", "Image pointers moved to {}".format(output_dir_missing), "Looped through {} of {} patients".format(i + 1, len(ptr_files)), "Number of pointer files moved: {}".format(moved_c), "Operation finished at {}".format(str(datetime.now().time())), "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages) except KeyboardInterrupt: hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================TRANSFER OF IMAGE POINTERS CANCELLED!=========================", "Operation cancelled at {}".format(str(datetime.now())), "Looped through {} of {} patients".format(i + 1, len(ptr_files)), "Number of pointer files moved during operation: {}".format( initial_moved_c - moved_c), "Total elapsed time: {} hours {} minutes {} seconds".format( hrs, mins, secs) ] log_and_print(output_messages) logger.error("Unexpected error occured at {}\n".format( str(datetime.now())), exc_info=True)
def create_img_ptrs(filepath, output_dir, suffix): start = time() #start time for this function output_messages = [ "====================CREATING IMAGE POINTERS====================", "Operation started at {}".format(datetime.now().time()) ] log_and_print(output_messages) # series identifiers (series of interest) sax_cine_prefix = "CINE_segmented_SAX_b" tagged_cine_prefix = "cine_tagging_3sl_SAX_b" # initialise other variables file_c = len([ f for f in os.listdir(output_dir) if f.endswith(".img_imageptr") ]) # variable to keep file count initial_file_c = file_c #number of files at the start of the program start_i = int( input( "Enter the patient number you want to start with (beginning = 0): " )) print() # creating image pointers for tagged and cines based on the file path found in the image pointer log_and_print(["Creating image pointer files in {}\n".format(output_dir)]) try: i = -1 for root, dirs, files in os.walk(filepath): ''' if os.path.dirname(root) == filepath: new_fp = root ''' new_fp = filepath images = [] images = [f for f in files if f.endswith(".dcm")] if len(images) != 0: i += 1 #keep count of the patient number if i >= start_i: image_pointer_list = [ imgptr for imgptr in os.listdir(output_dir) if imgptr.endswith(".img_imageptr") ] patient_path = root patient_name = get_patient_name(patient_path) if "{}{}".format(patient_name.replace(" ", "_"), suffix) not in image_pointer_list: gen_patient_path = patient_path.replace( new_fp, "IMAGEPATH") try: create_new_image_pointer(new_fp, patient_name, patient_path, gen_patient_path, sax_cine_prefix, tagged_cine_prefix, output_dir, suffix) except FileNotFoundError: logger.error( "\nThe system cannot find the path specified {}\n" .format(patient_path), exc_info=True) continue file_c += 1 # info for user output_messages = [ "Total # of image pointers created: {}".format( file_c), "Iteration info: Patient#{} {}\n".format( i, patient_name) ] log_and_print(output_messages) else: # info for user if image pointer already exists #output_messages = ["{}{} already exists".format(patient_name.replace(" ","_"), suffix), # "Iteration info: Patient#{} {}\n".format(i, patient_name)] #log_and_print(output_messages, logger) print("{}{} already exists".format( patient_name.replace(" ", "_"), suffix)) print("Iteration info: Patient#{} {}\n".format( i, patient_name)) # display when code is finished running hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================IMAGE POINTERS CREATED!=========================", "Image pointers created during operation: {}".format( file_c - initial_file_c), "Total # of image pointers created: {}".format(file_c), "Image pointer files stored in {}".format(output_dir), "Operation finished at {}".format(str(datetime.now())), "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages) except KeyboardInterrupt: hrs, mins, secs = calculate_time_elapsed(start) output_messages = [ "=========================CREATION OF IMAGE POINTERS CANCELLED!=========================", "Operation cancelled at {}".format(str(datetime.now())), "Image pointers created during operation: {}".format( file_c - initial_file_c), "Total # of image pointers created: {}".format(file_c), "Image pointer files stored in {}".format(output_dir), "Last iteration info: Patient#{} {}".format(i, patient_name), "Total elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs) ] log_and_print(output_messages)
def create_new_image_pointer(filepath, pat_name, image_path, gen_image_path, sax_cine_prefix, tagged_cine_prefix, output_dir, suffix): start = time() files = [f for f in os.listdir(image_path) if f.endswith(".dcm")] cine_and_tagged = [] #cine and tagged image files i = 0 #iteration count for while loop log_and_print([ "Making image pointer file for {} from {}".format( pat_name, image_path) ]) while i < len(files): try: ds = pydicom.dcmread( os.path.join(image_path, files[i]), specific_tags=["SeriesDescription", "InstanceNumber"]) except pydicom.errors.InvalidDicomError: print("Forcing read...") logger.error("Forcing read...") ds = pydicom.dcmread( os.path.join(image_path, files[i]), force=True, specific_tags=["SeriesDescription", "InstanceNumber"]) try: curr_series = ds.SeriesDescription except AttributeError: print("Skipping file in {}".format( os.path.join(image_path, files[i]))) logger.error("Skipping file in {}".format( os.path.join(image_path, files[i]))) i += 1 continue if sax_cine_prefix in curr_series: #normal cines series_n = 0 try: slice_n = int( curr_series[len(sax_cine_prefix):] ) - 1 #file name is "CINE_segmented_SAX_b#", where # is the slice number except ValueError: logger.error( "Special name case found | Patient dir: {}".format( image_path)) try: logger.error("Trying different approach...") slice_n = int( curr_series[len(sax_cine_prefix):len(curr_series) - 1] ) - 1 #file name is "CINE_segmented_SAX_b#", where # is the slice number except ValueError: logger.error("Issue unresolved\n") return logger.info("Issue resolved!\n") index = ds.InstanceNumber - 1 cine_and_tagged.append("{:>2}\t{:>2}\t{:>2}\t{}\\{}".format( series_n, slice_n, index, gen_image_path, files[i])) elif tagged_cine_prefix in curr_series: #tagged cines series_n = 1 try: slice_n = int( curr_series[len(tagged_cine_prefix):len(curr_series) - 1] ) - 1 #file name is "cine_tagging_3sl_SAX_b#s", where # is the slice number except ValueError: logger.error( "Special name case found | Patient dir: {}".format( image_path)) try: logger.error("Trying different approach...") slice_n = int(curr_series[len(tagged_cine_prefix):]) - 1 except ValueError: logger.error("Issue unresolved\n") return logger.info("Issue resolved!\n") index = ds.InstanceNumber - 1 cine_and_tagged.append("{:>2}\t{:>2}\t{:>2}\t{}\\{}".format( series_n, slice_n, index, gen_image_path, files[i])) i += 1 log_and_print([ "Looped through {} of {} files in {}".format(i, len(files), pat_name) ]) print("Sorting...") cine_and_tagged.sort() # saving the image pointer as a file save_image_pointer(filepath, pat_name, output_dir, image_path, cine_and_tagged, suffix) # time keeping hrs, mins, secs = calculate_time_elapsed(start) log_and_print(["Elapsed time: {} minutes {} seconds".format(mins, secs)]) return
''' This is the main function To be modified: basepath, filepath, cim_dir, cim_models ''' if __name__ == "__main__": # start logging start_ = time() # to keep time ts = datetime.fromtimestamp(start_).strftime( '%Y-%m-%d') #time stamp for the log file logname = "{}-prepare-img-ptrs.log".format(ts) logging.basicConfig(filename=logname, level=logging.DEBUG) output_messages = [ "====================STARTING MAIN PROGRAM====================", "Operation started at {}\n".format(datetime.now().time()) ] log_and_print(output_messages) # where you want the outputs to be stored basepath = os.path.join(os.getcwd()) #basepath = "" #uncomment and specify if you want the basepath to be different from the working directory # where the multipatient folders are stored filepath = "E:\\Original Images" # output image pointer file suffix suffix = "_cine_and_tagged.img_imageptr" # dirnames and paths of the cim models cim_dir = "C:\\Users\\arad572\\Downloads\\all CIM" cim_models = [ "CIM_DATA_AB", "CIM_DATA_EL1", "CIM_DATA_EL2", "CIM_DATA_EM",
def prepare_dicom_images(filepaths, ptr_content, slices, view): cine_dicom_paths = [] tagged_dicom_paths = [] cine_images = [] tagged_images = [] cine_px_spaces = [] tagged_px_spaces = [] cine_img_pos = [] tagged_img_pos = [] cine_img_orient = [] tagged_img_orient = [] # loop through the slices for index, i in enumerate(slices): tmp_cine_dp = [] tmp_tagged_dp = [] tmp_cine_images = [] tmp_tagged_images = [] # separate the cine frames and the tagged frames slice_con = ptr_content["slice"] == i cine_con = np.logical_and(ptr_content["series"] == 0, slice_con) tagged_con = np.logical_and(ptr_content["series"] == 1, slice_con) cine_frames = ptr_content[cine_con] tagged_frames = ptr_content[tagged_con] # only get 20 frames for j in range(20): if j < len(tagged_frames): gen_tagged_imagepath = tagged_frames["path"][j] tmp_tagged_dp.append(gen_tagged_imagepath) tagged_imagepath = get_image_path(gen_tagged_imagepath, filepaths) dst = pydicom.dcmread(tagged_imagepath) dst_img = dst.pixel_array tag_h, tag_w, h_diff, w_diff = get_dimensions( dst_img, 256, 256) try: dst_img_res = cv2.copyMakeBorder(dst_img, h_diff // 2, h_diff - (h_diff // 2), w_diff // 2, w_diff - (w_diff // 2), cv2.BORDER_CONSTANT, value=[0, 0, 0]) except ValueError: log_and_print( "Dicom width/height larger than 256 for patient {}". format(dst.PatientName)) aspectRatio = tag_w / tag_h if tag_h > 256 and tag_h > tag_w: tag_h = 256 tag_w = tag_h * aspectRatio elif tag_w > 256 and tag_w > tag_h: tag_w = 256 tag_h = tag_w / aspectRatio dst_img_res = cv2.resize(dst_img, (tag_w, tag_h), interpolation=cv2.INTER_CUBIC) tag_h, tag_w, h_diff, w_diff = get_dimensions( dst_img, 256, 256) dst_img_res = cv2.copyMakeBorder(dst_img_res, h_diff // 2, h_diff - (h_diff // 2), w_diff // 2, w_diff - (w_diff // 2), cv2.BORDER_COSNTANT, value=[0, 0, 0]) tmp_tagged_images.append(dst_img_res) if j == 0: # we'll use the end diastolic image info to resize our cine frames # only take 50 frames for k in range(50): if k < len(cine_frames): gen_cine_imagepath = cine_frames["path"][k] tmp_cine_dp.append(gen_cine_imagepath) cine_imagepath = get_image_path( gen_cine_imagepath, filepaths) dsc = pydicom.dcmread(cine_imagepath) dsc_img = dsc.pixel_array dsc_img_res = cv2.resize( dsc_img, (tag_w, tag_h), interpolation=cv2.INTER_CUBIC) dsc_img_res = cv2.copyMakeBorder( dsc_img_res, h_diff // 2, h_diff - (h_diff // 2), w_diff // 2, w_diff - (w_diff // 2), cv2.BORDER_CONSTANT, value=[0, 0, 0]) tmp_cine_images.append(dsc_img_res) if len(cine_frames) < 50 and k >= len(cine_frames): if k == len(cine_frames): log_and_print( "Adding {} cine frame(s) for patient {}". format(50 - len(cine_frames), dsc.PatientName)) tmp_cine_dp.append("") tmp_cine_images.append(np.zeros((256, 256))) # if the slice has less than 20 frames if len(tagged_frames) < 20 and j >= len(tagged_frames): if j == len(cine_frames): log_and_print( "Adding {} tagged frame(s) for patient {}".format( 50 - len(cine_frames), dst.PatientName)) tmp_tagged_dp.append("") tmp_tagged_images.append(np.zeros((256, 256))) cine_dicom_paths.append(tmp_cine_dp) tagged_dicom_paths.append(tmp_tagged_dp) cine_images.append(tmp_cine_images) tagged_images.append(tmp_tagged_images) cine_px_spaces.append(dsc.PixelSpacing[0]) tagged_px_spaces.append(dst.PixelSpacing[0]) cine_img_pos.append(dsc.ImagePositionPatient) tagged_img_pos.append(dst.ImagePositionPatient) cine_img_orient.append(dsc.ImageOrientationPatient) tagged_img_orient.append(dst.ImageOrientationPatient) if index == len(slices) - 1: cine_images = np.array(cine_images) tagged_images = np.array(tagged_images) if (view): view_images(cine_images[index][0], tagged_images[index][0]) #viewing the ED frame cine_px_spaces = np.array(cine_px_spaces) tagged_px_spaces = np.array(tagged_px_spaces) cine_img_pos = np.array(cine_img_pos) tagged_img_pos = np.array(tagged_img_pos) cine_img_orient = np.array(cine_img_orient) tagged_img_orient = np.array(tagged_img_orient) return cine_dicom_paths, tagged_dicom_paths, cine_images, tagged_images, cine_px_spaces, tagged_px_spaces, cine_img_pos, tagged_img_pos, cine_img_orient, tagged_img_orient
def get_data_from_h5_file(h5_filepath, cim_path, patient_name, ptr_slices): # get the list of h5py files h5_files = [ os.path.join(h5_filepath, f) for f in os.listdir(h5_filepath) if fnmatch.fnmatch(f, "*.seq.noresize.*.h5") ] # get the cim patient name and model name from cim path cim_model_name, cim_pat_name = os.path.split(cim_path) # gets the paths of the h5 files of the specified observer paths = [f for f in h5_files if cim_model_name.lower() in f.lower()] patient_names = [] cim_paths = [] slices = [] bbox_corners = [] landmark_coords = [] # loop through the paths and get info for current patient for path in paths: with h5py.File(path, 'r') as hf: patients = np.array(hf.get("patients")) p_indices = np.array(np.where(patients == cim_pat_name))[0] if len(p_indices) != 0: tmp_slices = np.array( hf.get("slices"))[p_indices[0]:p_indices[-1] + 1] tmp_bbox_corners = np.array( hf.get("bbox_corners"))[p_indices[0]:p_indices[-1] + 1, :] tmp_landmark_coords = np.array( hf.get("ed_coords"))[p_indices[0]:p_indices[-1] + 1, :, :, :] init = True for sl in ptr_slices: p_index = np.array( np.where( tmp_slices == "series_1_slice_{}".format(sl + 1)))[0] if len( p_index ) != 0: #if we found the index of the slice with bbox corners if len(p_index) > 1: log_error_and_print( "Duplicate slice! Patient: {}, CIM_Path: {}, h5 filename: {}, indices: {}" .format(patient_name, cim_path, path, p_indices)) log_and_print("Adding only one slice...") p_index = [p_index[0]] slices.append(sl) cim_paths.append(cim_path) patient_names.append(patient_name.replace("_", " ")) if init: bbox_corners = tmp_bbox_corners[p_index, :] landmark_coords = tmp_landmark_coords[ p_index, :, :, :] init = False else: bbox_corners = np.append( bbox_corners, tmp_bbox_corners[p_index, :], axis=0) landmark_coords = np.append( landmark_coords, tmp_landmark_coords[p_index, :, :, :], axis=0) if len(slices) != 0: #add to dataset model slices = np.array(slices) if slices.shape[0] != bbox_corners.shape[0]: print(slices.shape[0], bbox_corners.shape[0]) log_error_and_print( "Adding unequal number of data...\nPatient: {}, CIM_Path: {}, h5 filename: {}, indices: {}" .format(patient_name, cim_path, path, p_indices)) return patient_names, cim_paths, slices, bbox_corners, landmark_coords return None, None, None, None, None
def create_h5_file(filepaths, ptr_files_path, eds_h5_filepath, cim_patients, output_dir, output_filename, dataset_dict): ''' I need to create three groups (train, val, test) Inside each group, I need to create these datasets: + patients (as in the header) - N number of rows + cim_paths (includes model and patient name in the path separated by \\) - N number of rows + slices (only includes the ones with landmark_coords) - N x slices + dicom_path (path from the cim image pointer) - N x 50 + cine_px_spaces + tagged_px_spaces + cine_images (pixel arrays of the cine images - all frames) - N x T x 256 x 256 (resized) + tagged_images (pixel arrays of the tagged images - all frames) - N x T x 256 x 256 (resized) + landmark_coords (coordinates of the points) - N x 20 x 2 x 168 + region (idk the need) + es_frame_idx (where the end systolic frame is) N + bbox corners N x 4 ''' # create the h5 file # we incrementally write to h5 file so that we don't store everything to memory(slows down after some time) dsm = DataSetModel() #initialise datasetmodel print("Creating h5file...") for key, ptr_files in dataset_dict.items(): log_and_print("Obtaining data for {} set".format(key)) start_ = time() start = time() p_cnt = 0 #initialise number unique cases added to set s_cnt = 0 #initiliase number of slices added to set for i, ptr in enumerate(ptr_files): #loop through the pointers get_all_data(dsm, filepaths, ptr_files_path, eds_h5_filepath, cim_patients, ptr) if len(set(dsm.patient_names)) == 5 or i == len( ptr_files ) - 1: #if we have 20 unique patients added or if we have reached the end of the dictionary p_cnt += len(set(dsm.patient_names)) s_cnt += len(dsm.slices) print("Looped through {}/{} patients for {} set".format( i + 1, len(ptr_files), key)) if not os.path.isfile( os.path.join(output_dir, output_filename) ): #creating the h5 file if it doesn't exist with h5py.File(os.path.join(output_dir, output_filename), 'w') as hf: create_datasets(hf, key, dsm) else: #if h5file exists with h5py.File(os.path.join(output_dir, output_filename), 'a') as hf: if "//{}".format(key) not in hf: create_datasets(hf, key, dsm) else: add_datasets(hf, key, dsm) hrs, mins, secs = calculate_time_elapsed(start_) print("Added {} unique cases to {} set".format( len(set(dsm.patient_names)), key)) print("Added {} slices to {} set".format(len(dsm.slices), key)) print("Elapsed time: {} hours {} minutes {} seconds\n".format( hrs, mins, secs)) start_ = time() dsm = DataSetModel() #reset the datasetmodel if i == len(ptr_files) - 1: hrs, mins, secs = calculate_time_elapsed(start) log_and_print("Finished creating {} set".format(key)) log_and_print( "Total number of unique cases: {}".format(p_cnt)) log_and_print("Total number of slices: {}".format(s_cnt)) log_and_print( "Elapsed time: {} hours {} minutes {} seconds\n". format(hrs, mins, secs))
def get_all_data(dsm, filepaths, ptr_files_path, eds_h5_filepath, cim_patients, ptr): # loop through the patients and obtain needed data for the dataset model if len(dsm.slices) == 0: init = True #need for initialisation of numpy arrays else: init = False #for ptr in ptr_files: patient_name = ptr.replace("_match.img_imageptr", "") #get the patient name # get the cim path for the patient MODEL\PatientName cim_path = get_cim_path(patient_name, cim_patients) # get the path of the pointer ptr_path = os.path.join(ptr_files_path, ptr) # read the content of the image pointer datatype = [('series', '<i4'), ('slice', '<i4'), ('index', '<i4'), ('path', 'U255')] ptr_content = np.genfromtxt(ptr_path, delimiter='\t', names='series, slice, index, path', skip_header=1, dtype=datatype) ptr_slices = get_slices(ptr_content) patient_names, cim_paths, slices, bbox_corners, landmark_coords = get_data_from_h5_file( eds_h5_filepath, cim_path, patient_name, ptr_slices) if cim_paths is not None: print("Landmark coordinates found for patient {}".format(patient_name)) print("Image pointer path: {}".format(ptr_path)) # preprocess the dicom images cine_dicom_paths, tagged_dicom_paths, cine_images, tagged_images, cine_px_spaces, tagged_px_spaces, cine_img_pos, tagged_img_pos, cine_img_orient, tagged_img_orient = prepare_dicom_images( filepaths, ptr_content, slices, view=False) # add needed data to the dataset model dsm.patient_names.extend(patient_names) dsm.cim_paths.extend(cim_paths) dsm.cine_dicom_paths.extend(cine_dicom_paths) dsm.tagged_dicom_paths.extend(tagged_dicom_paths) if init: dsm.slices = slices dsm.bbox_corners = bbox_corners dsm.landmark_coords = landmark_coords dsm.cine_images = cine_images dsm.tagged_images = tagged_images dsm.cine_px_spaces = cine_px_spaces dsm.tagged_px_spaces = tagged_px_spaces dsm.cine_img_pos = cine_img_pos dsm.tagged_img_pos = tagged_img_pos dsm.cine_img_orient = cine_img_orient dsm.tagged_img_orient = tagged_img_orient else: dsm.slices = np.append(dsm.slices, slices, axis=0) dsm.bbox_corners = np.append(dsm.bbox_corners, bbox_corners, axis=0) dsm.landmark_coords = np.append(dsm.landmark_coords, landmark_coords, axis=0) dsm.cine_images = np.append(dsm.cine_images, cine_images, axis=0) dsm.tagged_images = np.append(dsm.tagged_images, tagged_images, axis=0) dsm.cine_px_spaces = np.append(dsm.cine_px_spaces, cine_px_spaces) dsm.tagged_px_spaces = np.append(dsm.tagged_px_spaces, tagged_px_spaces) dsm.cine_img_pos = np.append(dsm.cine_img_pos, cine_img_pos, axis=0) dsm.tagged_img_pos = np.append(dsm.tagged_img_pos, tagged_img_pos, axis=0) dsm.cine_img_orient = np.append(dsm.cine_img_orient, cine_img_orient, axis=0) dsm.tagged_img_orient = np.append(dsm.tagged_img_orient, tagged_img_orient, axis=0) else: log_and_print( "No landmark coordinates for patient {}".format(patient_name))