def save_image_pointer(filepath, pat_name, output_dir, image_path,
                       image_ptr_list, suffix):
    output_file = os.path.join(
        output_dir, "{}{}".format(pat_name.replace(" ", "_"), suffix))
    with open(output_file, "w") as f:
        f.write(
            "### SeriesNum(0=normal cine, 1=tagged cine) SliceNum IndexNum Path delimiter = \"\\t\" ({}) ###\n"
            .format(filepath))
        for item in image_ptr_list:
            f.write("{}\n".format(item))
    log_and_print(
        ["Created image pointer file location: {}".format(output_file)])

    return
def prepare_img_ptrs(basepath, filepath, suffix, cim_dir, cim_models,
                     output_dir, output_dir_missing, output_dir_match):
    # create the output directories if they don't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if not os.path.exists(output_dir_missing):
        os.makedirs(output_dir_missing)

    if not os.path.exists(output_dir_match):
        os.makedirs(output_dir_match)

    # prompt the user which functions the user wants to use
    steps = prompt_user()

    try:
        for s in steps:
            if s == 1:
                try:
                    create_img_ptrs(filepath, output_dir, suffix)
                except KeyboardInterrupt:
                    break
            if s == 2:
                try:
                    move_img_ptrs(filepath, output_dir, output_dir_missing,
                                  suffix)
                except KeyboardInterrupt:
                    break
            if s == 3:
                try:
                    find_match(filepath, output_dir, cim_models, cim_dir,
                               output_dir_match, suffix)
                except KeyboardInterrupt:
                    break

        hrs, mins, secs = calculate_time_elapsed(start_)
        output_messages = [
            "====================MAIN PROGRAM FINISHED!====================",
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)

    except KeyboardInterrupt:
        print("Operation Interrupted")
        print("Log file stored in: {}".format(os.path.join(basepath, logname)))
def prepare_h5_files(filepaths, ptr_files_path, ed_h5_filepath, cim_patients,
                     output_dir, output_filename, num_cases):
    # create the output directory if it is non-existent
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    log_and_print("h5 file will be stored in {}".format(output_dir))
    log_and_print("Output filename: {}".format(output_filename))

    # get all the pointer file paths
    ptr_files = [
        f for f in os.listdir(ptr_files_path)
        if f.endswith("_match.img_imageptr")
    ]
    if num_cases is not None:
        ptr_files = ptr_files[:num_cases]
    log_and_print("Creating h5 file from {} pointer files\n".format(
        len(ptr_files)))

    # main function of the program to create
    shuffle_data = True

    if shuffle_data:
        log_and_print("Shuffling data...")
        shuffle(ptr_files)

    # Divide the data into 60% train, 20% validation, and 20% test
    # ref code: http://machinelearninguru.com/deep_learning/data_preparation/hdf5/hdf5.html
    train = ptr_files[0:int(0.6 * len(ptr_files))]

    val = ptr_files[int(0.6 * len(ptr_files)):int(0.8 * len(ptr_files))]

    test = ptr_files[int(0.8 * len(ptr_files)):]

    dataset_dict = {"train": train, "validation": val, "test": test}

    create_h5_file(filepaths, ptr_files_path, eds_h5_filepath, cim_patients,
                   output_dir, output_filename, dataset_dict)
def find_match(filepath, ptr_files_path, cim_models, cim_dir, output_dir_match,
               suffix):
    start = time()  #start time for this function
    output_messages = [
        "====================FINDING MATCHES====================",
        "Operation started at {}".format(datetime.now().time()),
        "Finding matches for the image pointers stored in {}\n".format(
            ptr_files_path)
    ]
    log_and_print(output_messages)

    # list each pointer file
    ptr_files = [
        ptr for ptr in os.listdir(ptr_files_path)
        if ptr.endswith("img_imageptr")
    ]

    # get all the paths to the cim ptr files
    cim_dirs = [os.path.join(cim_dir, d) for d in cim_models]
    cim_ptr_files = []
    for cim_dir in cim_dirs:
        cim_ptr_files += get_pointer_paths(cim_dir)

    # initialise other variables
    file_c = len([
        f for f in os.listdir(output_dir_match) if f.endswith(".img_imageptr")
    ])  # variable to keep file count
    initial_file_c = file_c  #number of files at the start of the program
    tol = 1e-5  #tolerance to check if two slices match
    match = False  #initialise match state
    start_i = int(input("Enter the patient number you want to start with: "))
    print()

    # looping through each pointer file
    log_and_print(
        ["Creating new image pointers in {}\n".format(output_dir_match)])
    try:
        for i, ptr in enumerate(ptr_files):
            if i >= start_i:
                ptr_path = os.path.join(
                    ptr_files_path, ptr)  #get the path to the image pointer
                match_ip = [
                    f for f in os.listdir(output_dir_match)
                    if f.endswith(".img_imageptr")
                ]  #list all image pointers created by this program
                patient_name = ptr.replace(
                    suffix,
                    "")  #get the patient name from the image pointer file name
                filepath = "E:\\Original Images\\2015"  #default filepath is filepath1
                print("Checking patient {} of {}: {}".format(
                    i + 1, len(ptr_files), patient_name))
                output_filename = "{}_match.img_imageptr".format(patient_name)
                if output_filename not in match_ip:  #if the image pointer with the matching slices doesn't exist for the patient yet
                    try:
                        cim_ptr_path = [
                            cim_ptr for cim_ptr in cim_ptr_files
                            if patient_name.lower() in cim_ptr.lower()
                        ][0]  #get the cim pointer path of current patient
                        logger.info(
                            "Patient#{}: {} - CIM image pointer found: {}".
                            format(i + 1, patient_name, cim_ptr_path))
                    except IndexError:
                        try:
                            cim_ptr_path = [
                                cim_ptr for cim_ptr in cim_ptr_files
                                if patient_name.lower().replace("_bio", "") in
                                cim_ptr.lower()
                            ][0]  #get the cim pointer path of current patient
                            logger.info(
                                "Patient#{}: {} - CIM image pointer found: {}".
                                format(i + 1, patient_name, cim_ptr_path))
                        except IndexError:
                            logger.error(
                                "Patient#{}: {} - No image pointer file found in the CIM folders\n"
                                .format(i + 1, patient_name))
                            continue

                    # reading the content of the pointer file (cine_and_tagged, and cim)
                    datatype = [('series', '<i4'), ('slice', '<i4'),
                                ('index', '<i4'), ('path', 'U255')]
                    ptr_content = np.genfromtxt(
                        ptr_path,
                        delimiter='\t',
                        names='series, slice, index, path',
                        skip_header=1,
                        dtype=datatype)
                    cim_ptr_content = np.genfromtxt(
                        cim_ptr_path,
                        delimiter='\t',
                        names='series, slice, index, path',
                        skip_header=1,
                        dtype=datatype)
                    with open(
                            ptr_path, "r"
                    ) as f:  #obtain header to be written later to the output file together with the matching slices
                        header = f.readline().strip()

                    # only extract the first frame of each slice (cine_and_tagged, and cim)
                    condition_1 = np.logical_and(
                        ptr_content["series"] == 0, ptr_content["index"] == 0
                    )  #condition to find the first frames of each slice in normal cine series
                    condition_2 = np.logical_and(
                        ptr_content["series"] == 1, ptr_content["index"] == 0
                    )  #condition to find the first frames of each slice in tagged cine series
                    condition_3 = np.logical_and(
                        cim_ptr_content["series"] == 0,
                        cim_ptr_content["index"] == 0
                    )  #condition to find the first frames of each slice in tagged cine series (cim)
                    first_fr_cine = ptr_content[
                        condition_1]  #extract the first frames from each slice in normal cine series
                    first_fr_tagged = ptr_content[
                        condition_2]  #extract the first frames from each slice in tagged cine series
                    cim_first_fr_tagged = cim_ptr_content[
                        condition_3]  #extract the first frames from each slice in tagged cine series of the cim ptr file

                    # loop through the slices in cim tagged cine
                    tagged_array = []  #reset tagged array
                    cine_array = []  #reset cine array
                    for cim_frame_t in cim_first_fr_tagged:
                        cim_file_name = os.path.basename(
                            cim_frame_t["path"]
                        )  #get the filename of the first frame of the first slice in the cine imgpointer
                        try:
                            tagged_slice_i = [
                                curr_i for curr_i, curr_slice in enumerate(
                                    first_fr_tagged)
                                if cim_file_name in curr_slice["path"]
                            ][0]  #get the slice index from the created image pointer that matches the current slice in the cim image pointer
                        except IndexError:
                            match = False
                            logger.error("Error Occurred\n", exc_info=True)
                            break
                        tagged_slice = first_fr_tagged[
                            tagged_slice_i]  #take tagged slice from our image pointer
                        tagged_series = ptr_content[np.logical_and(
                            ptr_content["series"] == 1,
                            ptr_content["slice"] == tagged_slice["slice"]
                        )]  #get the whole series of that slice from the created image pointer
                        tagged_series[:]["slice"] = cim_frame_t[
                            "slice"]  #replace the slice number of the tagged series with the slice number of the tagged series in the cim image pointer file
                        '''
                        if j == 0:  #to initialise the array to be saved later
                            tagged_array = tagged_series.flatten()
                        else:
                            tagged_array = np.append(tagged_array, tagged_series.flatten())
                        '''
                        first_fr_tagged = np.delete(
                            first_fr_tagged, tagged_slice_i
                        )  #delete added slice to reduce computational time

                        tagged_image_file = tagged_slice["path"].replace(
                            "IMAGEPATH", filepath)

                        if not os.path.exists(
                                tagged_image_file
                        ):  #if file is in the 2014 folder
                            tagged_image_file = tagged_slice["path"].replace(
                                "IMAGEPATH", "E:\\Original Images\\2014")
                            filepath = "E:\\Original Images\\2014"

                        ds_tag = pydicom.dcmread(
                            tagged_image_file,
                            specific_tags=[
                                "SliceLocation", "PatientPosition"
                            ])  #get the slice location from metaheader

                        # Loop through each slice in the cine series
                        for k, frame_c in enumerate(first_fr_cine):
                            cine_image_file = frame_c["path"].replace(
                                "IMAGEPATH", filepath)  #get the file path
                            ds_cine = pydicom.dcmread(
                                cine_image_file,
                                specific_tags=[
                                    "SliceLocation", "PatientPosition"
                                ])  #read to get slice location
                            if ds_tag.PatientPosition == ds_cine.PatientPosition:
                                if abs(
                                        abs(ds_tag.SliceLocation) -
                                        abs(ds_cine.SliceLocation)) <= tol:
                                    cine_series = ptr_content[np.logical_and(
                                        ptr_content["series"] == 0,
                                        ptr_content["slice"] ==
                                        frame_c["slice"]
                                    )]  #get the entire cine series that matches the current tagged slice
                                    cine_series[:]["slice"] = cim_frame_t[
                                        "slice"]  #replace the slice number of the current cine series with the slice number from the tagged series of the cim image pointer file
                                    if not match:  #to initialise the array to be saved later
                                        match = True
                                        cine_array = cine_series.flatten()
                                        tagged_array = tagged_series.flatten()
                                    else:
                                        cine_array = np.append(
                                            cine_array, cine_series.flatten())
                                        tagged_array = np.append(
                                            tagged_array,
                                            tagged_series.flatten())
                                    first_fr_cine = np.delete(
                                        first_fr_cine, k
                                    )  #delete added slice to reduce computational time
                            else:
                                log_and_print(
                                    ["Patient position not the same"])

                    if match:
                        output_array = np.append(
                            cine_array, tagged_array)  #combine two arrays
                        # create the image pointer file containing the slices in the cine and tagged series that match
                        logger.info("New image pointer file: {}\n".format(
                            os.path.join(output_dir_match, output_filename)))
                        np.savetxt(os.path.join(output_dir_match,
                                                output_filename),
                                   output_array,
                                   fmt="%2d\t%2d\t%2d\t%s",
                                   delimiter="\t",
                                   header=header,
                                   comments="")
                        file_c += 1
                        match = False  #reset match status
                    else:
                        logger.error(
                            "No match found! Patient directory: {}\n".format(
                                os.path.dirname(cine_image_file)))
                        continue

        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================MATCHES FOUND!=========================",
            "Looped through {} of {} patients".format(i + 1 - start_i,
                                                      len(ptr_files)),
            "Image pointers created during operation: {}".format(
                file_c - initial_file_c),
            "Total # of image pointers created: {}".format(file_c),
            "Image pointer files stored in {}".format(output_dir_match),
            "Operation finished at {}".format(str(datetime.now())),
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)

        #sendemail("*****@*****.**", "*****@*****.**", "prepare_img_ptrs.py Program Finished", "Here's the log file:", os.path.join(os.getcwd(),logname))

        #os.system("shutdown -s -t 0")

    except KeyboardInterrupt:
        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================CREATION OF IMAGE POINTERS CANCELLED(MATCH)!=========================",
            "Operation cancelled at {}".format(str(datetime.now())),
            "Looped through {} of {} patients".format(i + 1 - start_i,
                                                      len(ptr_files)),
            "Image pointers created during operation: {}".format(
                file_c - initial_file_c),
            "Total # of image pointers created: {}".format(file_c),
            "Image pointer files stored in {}".format(output_dir_match),
            "Last iteration info: Patient#{} {}".format(i, patient_name),
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)

    except:
        logger.error("UNEXPECTED ERROR", exc_info=True)
def move_img_ptrs(filepath, ptr_path, output_dir_missing, suffix):
    start = time()  #start time for this function
    output_messages = [
        "====================MOVING IMAGE POINTERS====================",
        "Operation started at {}".format(datetime.now().time())
    ]
    log_and_print(output_messages)

    # get the path to each pointer file
    ptr_files = [
        os.path.join(ptr_path, ptr) for ptr in os.listdir(ptr_path)
        if ptr.endswith(".img_imageptr")
    ]

    # initialise other variables
    moved_c = len([
        f for f in os.listdir(output_dir_missing)
        if f.endswith(".img_imageptr")
    ])  #moved image pointers count
    initial_moved_c = moved_c

    # looping through each pointer file
    log_and_print(
        ["Moving image pointer files to {}\n".format(output_dir_missing)])
    try:
        for i, ptr in enumerate(ptr_files):
            # reading the content of the pointer file
            datatype = [('series', '<i4'), ('slice', '<i4'), ('index', '<i4'),
                        ('path', 'U255')]
            ptr_content = np.genfromtxt(ptr,
                                        delimiter='\t',
                                        names='series, slice, index, path',
                                        skip_header=1,
                                        dtype=datatype)

            # only extract the first frame of each slice
            tagged_con = ptr_content[
                "series"] == 1  #condition for tagged series
            first_frames_tagged = ptr_content[
                tagged_con]  #extract the first frames from each slice in tagged cine series

            patient_name = os.path.basename(ptr).replace(
                suffix, "")  #get the patient name
            print("Checking patient {} of {}: {}".format(
                i + 1, len(ptr_files), patient_name))
            if len(first_frames_tagged) == 0:
                moved_c += 1
                log_and_print(
                    ["Moving image pointer {}".format(os.path.basename(ptr))])
                shutil.move(ptr, output_dir_missing)

        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================IMAGE POINTERS MOVED!=========================",
            "Image pointers moved to {}".format(output_dir_missing),
            "Looped through {} of {} patients".format(i + 1, len(ptr_files)),
            "Number of pointer files moved: {}".format(moved_c),
            "Operation finished at {}".format(str(datetime.now().time())),
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)

    except KeyboardInterrupt:
        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================TRANSFER OF IMAGE POINTERS CANCELLED!=========================",
            "Operation cancelled at {}".format(str(datetime.now())),
            "Looped through {} of {} patients".format(i + 1, len(ptr_files)),
            "Number of pointer files moved during operation: {}".format(
                initial_moved_c - moved_c),
            "Total elapsed time: {} hours {} minutes {} seconds".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)
        logger.error("Unexpected error occured at {}\n".format(
            str(datetime.now())),
                     exc_info=True)
def create_img_ptrs(filepath, output_dir, suffix):
    start = time()  #start time for this function
    output_messages = [
        "====================CREATING IMAGE POINTERS====================",
        "Operation started at {}".format(datetime.now().time())
    ]
    log_and_print(output_messages)

    # series identifiers (series of interest)
    sax_cine_prefix = "CINE_segmented_SAX_b"
    tagged_cine_prefix = "cine_tagging_3sl_SAX_b"

    # initialise other variables
    file_c = len([
        f for f in os.listdir(output_dir) if f.endswith(".img_imageptr")
    ])  # variable to keep file count
    initial_file_c = file_c  #number of files at the start of the program
    start_i = int(
        input(
            "Enter the patient number you want to start with (beginning = 0): "
        ))
    print()

    # creating image pointers for tagged and cines based on the file path found in the image pointer
    log_and_print(["Creating image pointer files in {}\n".format(output_dir)])
    try:
        i = -1
        for root, dirs, files in os.walk(filepath):
            '''
            if os.path.dirname(root) == filepath:
                new_fp = root
            '''
            new_fp = filepath
            images = []
            images = [f for f in files if f.endswith(".dcm")]
            if len(images) != 0:
                i += 1  #keep count of the patient number
                if i >= start_i:
                    image_pointer_list = [
                        imgptr for imgptr in os.listdir(output_dir)
                        if imgptr.endswith(".img_imageptr")
                    ]
                    patient_path = root
                    patient_name = get_patient_name(patient_path)
                    if "{}{}".format(patient_name.replace(" ", "_"),
                                     suffix) not in image_pointer_list:
                        gen_patient_path = patient_path.replace(
                            new_fp, "IMAGEPATH")
                        try:
                            create_new_image_pointer(new_fp, patient_name,
                                                     patient_path,
                                                     gen_patient_path,
                                                     sax_cine_prefix,
                                                     tagged_cine_prefix,
                                                     output_dir, suffix)
                        except FileNotFoundError:
                            logger.error(
                                "\nThe system cannot find the path specified {}\n"
                                .format(patient_path),
                                exc_info=True)
                            continue
                        file_c += 1
                        # info for user
                        output_messages = [
                            "Total # of image pointers created: {}".format(
                                file_c),
                            "Iteration info: Patient#{} {}\n".format(
                                i, patient_name)
                        ]
                        log_and_print(output_messages)
                    else:
                        # info for user if image pointer already exists
                        #output_messages = ["{}{} already exists".format(patient_name.replace(" ","_"), suffix),
                        #                    "Iteration info: Patient#{} {}\n".format(i, patient_name)]
                        #log_and_print(output_messages, logger)
                        print("{}{} already exists".format(
                            patient_name.replace(" ", "_"), suffix))
                        print("Iteration info: Patient#{} {}\n".format(
                            i, patient_name))

        # display when code is finished running
        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================IMAGE POINTERS CREATED!=========================",
            "Image pointers created during operation: {}".format(
                file_c - initial_file_c),
            "Total # of image pointers created: {}".format(file_c),
            "Image pointer files stored in {}".format(output_dir),
            "Operation finished at {}".format(str(datetime.now())),
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)

    except KeyboardInterrupt:
        hrs, mins, secs = calculate_time_elapsed(start)
        output_messages = [
            "=========================CREATION OF IMAGE POINTERS CANCELLED!=========================",
            "Operation cancelled at {}".format(str(datetime.now())),
            "Image pointers created during operation: {}".format(
                file_c - initial_file_c),
            "Total # of image pointers created: {}".format(file_c),
            "Image pointer files stored in {}".format(output_dir),
            "Last iteration info: Patient#{} {}".format(i, patient_name),
            "Total elapsed time: {} hours {} minutes {} seconds\n".format(
                hrs, mins, secs)
        ]
        log_and_print(output_messages)
def create_new_image_pointer(filepath, pat_name, image_path, gen_image_path,
                             sax_cine_prefix, tagged_cine_prefix, output_dir,
                             suffix):
    start = time()
    files = [f for f in os.listdir(image_path) if f.endswith(".dcm")]
    cine_and_tagged = []  #cine and tagged image files
    i = 0  #iteration count for while loop
    log_and_print([
        "Making image pointer file for {} from {}".format(
            pat_name, image_path)
    ])
    while i < len(files):
        try:
            ds = pydicom.dcmread(
                os.path.join(image_path, files[i]),
                specific_tags=["SeriesDescription", "InstanceNumber"])
        except pydicom.errors.InvalidDicomError:
            print("Forcing read...")
            logger.error("Forcing read...")
            ds = pydicom.dcmread(
                os.path.join(image_path, files[i]),
                force=True,
                specific_tags=["SeriesDescription", "InstanceNumber"])
        try:
            curr_series = ds.SeriesDescription
        except AttributeError:
            print("Skipping file in {}".format(
                os.path.join(image_path, files[i])))
            logger.error("Skipping file in {}".format(
                os.path.join(image_path, files[i])))
            i += 1
            continue
        if sax_cine_prefix in curr_series:  #normal cines
            series_n = 0
            try:
                slice_n = int(
                    curr_series[len(sax_cine_prefix):]
                ) - 1  #file name is "CINE_segmented_SAX_b#", where # is the slice number
            except ValueError:
                logger.error(
                    "Special name case found | Patient dir: {}".format(
                        image_path))
                try:
                    logger.error("Trying different approach...")
                    slice_n = int(
                        curr_series[len(sax_cine_prefix):len(curr_series) - 1]
                    ) - 1  #file name is "CINE_segmented_SAX_b#", where # is the slice number
                except ValueError:
                    logger.error("Issue unresolved\n")
                    return
                logger.info("Issue resolved!\n")
            index = ds.InstanceNumber - 1
            cine_and_tagged.append("{:>2}\t{:>2}\t{:>2}\t{}\\{}".format(
                series_n, slice_n, index, gen_image_path, files[i]))
        elif tagged_cine_prefix in curr_series:  #tagged cines
            series_n = 1
            try:
                slice_n = int(
                    curr_series[len(tagged_cine_prefix):len(curr_series) - 1]
                ) - 1  #file name is "cine_tagging_3sl_SAX_b#s", where # is the slice number
            except ValueError:
                logger.error(
                    "Special name case found | Patient dir: {}".format(
                        image_path))
                try:
                    logger.error("Trying different approach...")
                    slice_n = int(curr_series[len(tagged_cine_prefix):]) - 1
                except ValueError:
                    logger.error("Issue unresolved\n")
                    return
                logger.info("Issue resolved!\n")
            index = ds.InstanceNumber - 1
            cine_and_tagged.append("{:>2}\t{:>2}\t{:>2}\t{}\\{}".format(
                series_n, slice_n, index, gen_image_path, files[i]))
        i += 1

    log_and_print([
        "Looped through {} of {} files in {}".format(i, len(files), pat_name)
    ])
    print("Sorting...")
    cine_and_tagged.sort()

    # saving the image pointer as a file
    save_image_pointer(filepath, pat_name, output_dir, image_path,
                       cine_and_tagged, suffix)

    # time keeping
    hrs, mins, secs = calculate_time_elapsed(start)
    log_and_print(["Elapsed time: {} minutes {} seconds".format(mins, secs)])

    return
'''
This is the main function
To be modified: basepath, filepath, cim_dir, cim_models
'''
if __name__ == "__main__":
    # start logging
    start_ = time()  # to keep time
    ts = datetime.fromtimestamp(start_).strftime(
        '%Y-%m-%d')  #time stamp for the log file
    logname = "{}-prepare-img-ptrs.log".format(ts)
    logging.basicConfig(filename=logname, level=logging.DEBUG)
    output_messages = [
        "====================STARTING MAIN PROGRAM====================",
        "Operation started at {}\n".format(datetime.now().time())
    ]
    log_and_print(output_messages)

    # where you want the outputs to be stored
    basepath = os.path.join(os.getcwd())
    #basepath = "" #uncomment and specify if you want the basepath to be different from the working directory

    # where the multipatient folders are stored
    filepath = "E:\\Original Images"

    # output image pointer file suffix
    suffix = "_cine_and_tagged.img_imageptr"

    # dirnames and paths of the cim models
    cim_dir = "C:\\Users\\arad572\\Downloads\\all CIM"
    cim_models = [
        "CIM_DATA_AB", "CIM_DATA_EL1", "CIM_DATA_EL2", "CIM_DATA_EM",
예제 #9
0
def prepare_dicom_images(filepaths, ptr_content, slices, view):
    cine_dicom_paths = []
    tagged_dicom_paths = []
    cine_images = []
    tagged_images = []
    cine_px_spaces = []
    tagged_px_spaces = []
    cine_img_pos = []
    tagged_img_pos = []
    cine_img_orient = []
    tagged_img_orient = []

    # loop through the slices
    for index, i in enumerate(slices):
        tmp_cine_dp = []
        tmp_tagged_dp = []
        tmp_cine_images = []
        tmp_tagged_images = []

        # separate the cine frames and the tagged frames
        slice_con = ptr_content["slice"] == i
        cine_con = np.logical_and(ptr_content["series"] == 0, slice_con)
        tagged_con = np.logical_and(ptr_content["series"] == 1, slice_con)

        cine_frames = ptr_content[cine_con]
        tagged_frames = ptr_content[tagged_con]

        # only get 20 frames
        for j in range(20):
            if j < len(tagged_frames):
                gen_tagged_imagepath = tagged_frames["path"][j]

                tmp_tagged_dp.append(gen_tagged_imagepath)

                tagged_imagepath = get_image_path(gen_tagged_imagepath,
                                                  filepaths)

                dst = pydicom.dcmread(tagged_imagepath)
                dst_img = dst.pixel_array
                tag_h, tag_w, h_diff, w_diff = get_dimensions(
                    dst_img, 256, 256)
                try:
                    dst_img_res = cv2.copyMakeBorder(dst_img,
                                                     h_diff // 2,
                                                     h_diff - (h_diff // 2),
                                                     w_diff // 2,
                                                     w_diff - (w_diff // 2),
                                                     cv2.BORDER_CONSTANT,
                                                     value=[0, 0, 0])
                except ValueError:
                    log_and_print(
                        "Dicom width/height larger than 256 for patient {}".
                        format(dst.PatientName))
                    aspectRatio = tag_w / tag_h
                    if tag_h > 256 and tag_h > tag_w:
                        tag_h = 256
                        tag_w = tag_h * aspectRatio
                    elif tag_w > 256 and tag_w > tag_h:
                        tag_w = 256
                        tag_h = tag_w / aspectRatio
                    dst_img_res = cv2.resize(dst_img, (tag_w, tag_h),
                                             interpolation=cv2.INTER_CUBIC)
                    tag_h, tag_w, h_diff, w_diff = get_dimensions(
                        dst_img, 256, 256)
                    dst_img_res = cv2.copyMakeBorder(dst_img_res,
                                                     h_diff // 2,
                                                     h_diff - (h_diff // 2),
                                                     w_diff // 2,
                                                     w_diff - (w_diff // 2),
                                                     cv2.BORDER_COSNTANT,
                                                     value=[0, 0, 0])

                tmp_tagged_images.append(dst_img_res)

                if j == 0:  # we'll use the end diastolic image info to resize our cine frames
                    # only take 50 frames
                    for k in range(50):
                        if k < len(cine_frames):
                            gen_cine_imagepath = cine_frames["path"][k]
                            tmp_cine_dp.append(gen_cine_imagepath)

                            cine_imagepath = get_image_path(
                                gen_cine_imagepath, filepaths)

                            dsc = pydicom.dcmread(cine_imagepath)
                            dsc_img = dsc.pixel_array
                            dsc_img_res = cv2.resize(
                                dsc_img, (tag_w, tag_h),
                                interpolation=cv2.INTER_CUBIC)
                            dsc_img_res = cv2.copyMakeBorder(
                                dsc_img_res,
                                h_diff // 2,
                                h_diff - (h_diff // 2),
                                w_diff // 2,
                                w_diff - (w_diff // 2),
                                cv2.BORDER_CONSTANT,
                                value=[0, 0, 0])

                            tmp_cine_images.append(dsc_img_res)

                        if len(cine_frames) < 50 and k >= len(cine_frames):
                            if k == len(cine_frames):
                                log_and_print(
                                    "Adding {} cine frame(s) for patient {}".
                                    format(50 - len(cine_frames),
                                           dsc.PatientName))
                            tmp_cine_dp.append("")
                            tmp_cine_images.append(np.zeros((256, 256)))

            # if the slice has less than 20 frames
            if len(tagged_frames) < 20 and j >= len(tagged_frames):
                if j == len(cine_frames):
                    log_and_print(
                        "Adding {} tagged frame(s) for patient {}".format(
                            50 - len(cine_frames), dst.PatientName))
                tmp_tagged_dp.append("")
                tmp_tagged_images.append(np.zeros((256, 256)))

        cine_dicom_paths.append(tmp_cine_dp)
        tagged_dicom_paths.append(tmp_tagged_dp)
        cine_images.append(tmp_cine_images)
        tagged_images.append(tmp_tagged_images)
        cine_px_spaces.append(dsc.PixelSpacing[0])
        tagged_px_spaces.append(dst.PixelSpacing[0])
        cine_img_pos.append(dsc.ImagePositionPatient)
        tagged_img_pos.append(dst.ImagePositionPatient)
        cine_img_orient.append(dsc.ImageOrientationPatient)
        tagged_img_orient.append(dst.ImageOrientationPatient)

        if index == len(slices) - 1:
            cine_images = np.array(cine_images)
            tagged_images = np.array(tagged_images)

        if (view):
            view_images(cine_images[index][0],
                        tagged_images[index][0])  #viewing the ED frame

    cine_px_spaces = np.array(cine_px_spaces)
    tagged_px_spaces = np.array(tagged_px_spaces)
    cine_img_pos = np.array(cine_img_pos)
    tagged_img_pos = np.array(tagged_img_pos)
    cine_img_orient = np.array(cine_img_orient)
    tagged_img_orient = np.array(tagged_img_orient)

    return cine_dicom_paths, tagged_dicom_paths, cine_images, tagged_images, cine_px_spaces, tagged_px_spaces, cine_img_pos, tagged_img_pos, cine_img_orient, tagged_img_orient
예제 #10
0
def get_data_from_h5_file(h5_filepath, cim_path, patient_name, ptr_slices):
    # get the list of h5py files
    h5_files = [
        os.path.join(h5_filepath, f) for f in os.listdir(h5_filepath)
        if fnmatch.fnmatch(f, "*.seq.noresize.*.h5")
    ]

    # get the cim patient name and model name from cim path
    cim_model_name, cim_pat_name = os.path.split(cim_path)

    # gets the paths of the h5 files of the specified observer
    paths = [f for f in h5_files if cim_model_name.lower() in f.lower()]

    patient_names = []
    cim_paths = []
    slices = []
    bbox_corners = []
    landmark_coords = []
    # loop through the paths and get info for current patient
    for path in paths:
        with h5py.File(path, 'r') as hf:
            patients = np.array(hf.get("patients"))
            p_indices = np.array(np.where(patients == cim_pat_name))[0]
            if len(p_indices) != 0:
                tmp_slices = np.array(
                    hf.get("slices"))[p_indices[0]:p_indices[-1] + 1]
                tmp_bbox_corners = np.array(
                    hf.get("bbox_corners"))[p_indices[0]:p_indices[-1] + 1, :]
                tmp_landmark_coords = np.array(
                    hf.get("ed_coords"))[p_indices[0]:p_indices[-1] +
                                         1, :, :, :]
                init = True
                for sl in ptr_slices:
                    p_index = np.array(
                        np.where(
                            tmp_slices == "series_1_slice_{}".format(sl +
                                                                     1)))[0]
                    if len(
                            p_index
                    ) != 0:  #if we found the index of the slice with bbox corners
                        if len(p_index) > 1:
                            log_error_and_print(
                                "Duplicate slice! Patient: {}, CIM_Path: {}, h5 filename: {}, indices: {}"
                                .format(patient_name, cim_path, path,
                                        p_indices))
                            log_and_print("Adding only one slice...")
                            p_index = [p_index[0]]
                        slices.append(sl)
                        cim_paths.append(cim_path)
                        patient_names.append(patient_name.replace("_", " "))
                        if init:
                            bbox_corners = tmp_bbox_corners[p_index, :]
                            landmark_coords = tmp_landmark_coords[
                                p_index, :, :, :]
                            init = False
                        else:
                            bbox_corners = np.append(
                                bbox_corners,
                                tmp_bbox_corners[p_index, :],
                                axis=0)
                            landmark_coords = np.append(
                                landmark_coords,
                                tmp_landmark_coords[p_index, :, :, :],
                                axis=0)

                if len(slices) != 0:  #add to dataset model
                    slices = np.array(slices)

                    if slices.shape[0] != bbox_corners.shape[0]:
                        print(slices.shape[0], bbox_corners.shape[0])
                        log_error_and_print(
                            "Adding unequal number of data...\nPatient: {}, CIM_Path: {}, h5 filename: {}, indices: {}"
                            .format(patient_name, cim_path, path, p_indices))

                    return patient_names, cim_paths, slices, bbox_corners, landmark_coords

    return None, None, None, None, None
예제 #11
0
def create_h5_file(filepaths, ptr_files_path, eds_h5_filepath, cim_patients,
                   output_dir, output_filename, dataset_dict):
    '''
    I need to create three groups (train, val, test)
    Inside each group, I need to create these datasets:
        + patients (as in the header) - N number of rows
        + cim_paths (includes model and patient name in the path separated by \\) - N number of rows
        + slices (only includes the ones with landmark_coords) - N x slices
        + dicom_path (path from the cim image pointer)  - N x 50
        + cine_px_spaces
        + tagged_px_spaces
        + cine_images (pixel arrays of the cine images - all frames)    - N x  T x 256 x 256 (resized)
        + tagged_images (pixel arrays of the tagged images - all frames)    - N x T x 256 x 256 (resized)
        + landmark_coords (coordinates of the points)   - N x 20 x 2 x 168 
        + region (idk the need)
        + es_frame_idx (where the end systolic frame is) N
        + bbox corners N x 4
    '''

    # create the h5 file
    # we incrementally write to h5 file so that we don't store everything to memory(slows down after some time)
    dsm = DataSetModel()  #initialise datasetmodel
    print("Creating h5file...")
    for key, ptr_files in dataset_dict.items():
        log_and_print("Obtaining data for {} set".format(key))
        start_ = time()
        start = time()
        p_cnt = 0  #initialise number unique cases added to set
        s_cnt = 0  #initiliase number of slices added to set
        for i, ptr in enumerate(ptr_files):  #loop through the pointers
            get_all_data(dsm, filepaths, ptr_files_path, eds_h5_filepath,
                         cim_patients, ptr)
            if len(set(dsm.patient_names)) == 5 or i == len(
                    ptr_files
            ) - 1:  #if we have 20 unique patients added or if we have reached the end of the dictionary
                p_cnt += len(set(dsm.patient_names))
                s_cnt += len(dsm.slices)
                print("Looped through {}/{} patients for {} set".format(
                    i + 1, len(ptr_files), key))
                if not os.path.isfile(
                        os.path.join(output_dir, output_filename)
                ):  #creating the h5 file if it doesn't exist
                    with h5py.File(os.path.join(output_dir, output_filename),
                                   'w') as hf:
                        create_datasets(hf, key, dsm)

                else:  #if h5file exists
                    with h5py.File(os.path.join(output_dir, output_filename),
                                   'a') as hf:
                        if "//{}".format(key) not in hf:
                            create_datasets(hf, key, dsm)
                        else:
                            add_datasets(hf, key, dsm)

                hrs, mins, secs = calculate_time_elapsed(start_)
                print("Added {} unique cases to {} set".format(
                    len(set(dsm.patient_names)), key))
                print("Added {} slices to {} set".format(len(dsm.slices), key))
                print("Elapsed time: {} hours {} minutes {} seconds\n".format(
                    hrs, mins, secs))
                start_ = time()
                dsm = DataSetModel()  #reset the datasetmodel

                if i == len(ptr_files) - 1:
                    hrs, mins, secs = calculate_time_elapsed(start)
                    log_and_print("Finished creating {} set".format(key))
                    log_and_print(
                        "Total number of unique cases: {}".format(p_cnt))
                    log_and_print("Total number of slices: {}".format(s_cnt))
                    log_and_print(
                        "Elapsed time: {} hours {} minutes {} seconds\n".
                        format(hrs, mins, secs))
예제 #12
0
def get_all_data(dsm, filepaths, ptr_files_path, eds_h5_filepath, cim_patients,
                 ptr):
    # loop through the patients and obtain needed data for the dataset model
    if len(dsm.slices) == 0:
        init = True  #need for initialisation of numpy arrays
    else:
        init = False
    #for ptr in ptr_files:
    patient_name = ptr.replace("_match.img_imageptr",
                               "")  #get the patient name

    # get the cim path for the patient MODEL\PatientName
    cim_path = get_cim_path(patient_name, cim_patients)

    # get the path of the pointer
    ptr_path = os.path.join(ptr_files_path, ptr)

    # read the content of the image pointer
    datatype = [('series', '<i4'), ('slice', '<i4'), ('index', '<i4'),
                ('path', 'U255')]
    ptr_content = np.genfromtxt(ptr_path,
                                delimiter='\t',
                                names='series, slice, index, path',
                                skip_header=1,
                                dtype=datatype)

    ptr_slices = get_slices(ptr_content)

    patient_names, cim_paths, slices, bbox_corners, landmark_coords = get_data_from_h5_file(
        eds_h5_filepath, cim_path, patient_name, ptr_slices)

    if cim_paths is not None:
        print("Landmark coordinates found for patient {}".format(patient_name))
        print("Image pointer path: {}".format(ptr_path))
        # preprocess the dicom images
        cine_dicom_paths, tagged_dicom_paths, cine_images, tagged_images, cine_px_spaces, tagged_px_spaces, cine_img_pos, tagged_img_pos, cine_img_orient, tagged_img_orient = prepare_dicom_images(
            filepaths, ptr_content, slices, view=False)

        # add needed data to the dataset model
        dsm.patient_names.extend(patient_names)
        dsm.cim_paths.extend(cim_paths)
        dsm.cine_dicom_paths.extend(cine_dicom_paths)
        dsm.tagged_dicom_paths.extend(tagged_dicom_paths)
        if init:
            dsm.slices = slices
            dsm.bbox_corners = bbox_corners
            dsm.landmark_coords = landmark_coords
            dsm.cine_images = cine_images
            dsm.tagged_images = tagged_images
            dsm.cine_px_spaces = cine_px_spaces
            dsm.tagged_px_spaces = tagged_px_spaces
            dsm.cine_img_pos = cine_img_pos
            dsm.tagged_img_pos = tagged_img_pos
            dsm.cine_img_orient = cine_img_orient
            dsm.tagged_img_orient = tagged_img_orient
        else:
            dsm.slices = np.append(dsm.slices, slices, axis=0)
            dsm.bbox_corners = np.append(dsm.bbox_corners,
                                         bbox_corners,
                                         axis=0)
            dsm.landmark_coords = np.append(dsm.landmark_coords,
                                            landmark_coords,
                                            axis=0)
            dsm.cine_images = np.append(dsm.cine_images, cine_images, axis=0)
            dsm.tagged_images = np.append(dsm.tagged_images,
                                          tagged_images,
                                          axis=0)
            dsm.cine_px_spaces = np.append(dsm.cine_px_spaces, cine_px_spaces)
            dsm.tagged_px_spaces = np.append(dsm.tagged_px_spaces,
                                             tagged_px_spaces)
            dsm.cine_img_pos = np.append(dsm.cine_img_pos,
                                         cine_img_pos,
                                         axis=0)
            dsm.tagged_img_pos = np.append(dsm.tagged_img_pos,
                                           tagged_img_pos,
                                           axis=0)
            dsm.cine_img_orient = np.append(dsm.cine_img_orient,
                                            cine_img_orient,
                                            axis=0)
            dsm.tagged_img_orient = np.append(dsm.tagged_img_orient,
                                              tagged_img_orient,
                                              axis=0)

    else:
        log_and_print(
            "No landmark coordinates for patient {}".format(patient_name))