def slide_info(display_all_properties=False): """ Display information (such as properties) about training images. Args: display_all_properties: If True, display all available slide properties. """ t = Time() num_train_images = get_num_training_slides() obj_pow_20_list = [] obj_pow_40_list = [] obj_pow_other_list = [] for slide_num in range(1, num_train_images + 1): slide_filepath = get_training_slide_path(slide_num) print("\nOpening Slide #%d: %s" % (slide_num, slide_filepath)) slide = open_slide(slide_filepath) print("Level count: %d" % slide.level_count) print("Level dimensions: " + str(slide.level_dimensions)) print("Level downsamples: " + str(slide.level_downsamples)) print("Dimensions: " + str(slide.dimensions)) objective_power = int(slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) print("Objective power: " + str(objective_power)) if objective_power == 20: obj_pow_20_list.append(slide_num) elif objective_power == 40: obj_pow_40_list.append(slide_num) else: obj_pow_other_list.append(slide_num) print("Associated images:") for ai_key in slide.associated_images.keys(): print(" " + str(ai_key) + ": " + str(slide.associated_images.get(ai_key))) print("Format: " + str(slide.detect_format(slide_filepath))) if display_all_properties: print("Properties:") for prop_key in slide.properties.keys(): print(" Property: " + str(prop_key) + ", value: " + str(slide.properties.get(prop_key))) print("\n\nSlide Magnifications:") print(" 20x Slides: " + str(obj_pow_20_list)) print(" 40x Slides: " + str(obj_pow_40_list)) print(" ??x Slides: " + str(obj_pow_other_list) + "\n") t.elapsed_display() # if __name__ == "__main__": # show_slide(2) # slide_info(display_all_properties=True) # slide_stats() # training_slide_to_image(4) # img_path = get_training_image_path(4) # img = open_image(img_path) # img.show() # slide_to_scaled_pil_image(5)[0].show() # singleprocess_training_slides_to_images() # multiprocess_training_slides_to_images()
def singleprocess_training_slides_to_images(): """ Convert all WSI training slides to smaller images using a single process. """ t = Time() num_train_images = get_num_training_slides() training_slide_range_to_images(1, num_train_images) t.elapsed_display()
def slide_info(datapath, display_all_properties=False): ## ap """ Display information (such as properties) about training images. Args: display_all_properties: If True, display all available slide properties. """ t = Time() num_train_images = get_num_training_slides() obj_pow_20_list = [] obj_pow_40_list = [] obj_pow_other_list = [] train_images = glob.glob(os.path.join(datapath, '*.svs')) ## ap ## for slide_num in range(1, num_train_images + 1): for slide_filepath in train_images: ## ap slide_num = int( os.path.basename(slide_filepath).split('.svs')[0]) ## ap ## slide_filepath = get_training_slide_path(slide_num) ## ap print("\nOpening Slide #%d: %s" % (slide_num, slide_filepath)) slide = open_slide(slide_filepath) print("Level count: %d" % slide.level_count) print("Level dimensions: " + str(slide.level_dimensions)) print("Level downsamples: " + str(slide.level_downsamples)) print("Dimensions: " + str(slide.dimensions)) objective_power = int( slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) print("Objective power: " + str(objective_power)) if objective_power == 20: obj_pow_20_list.append(slide_num) elif objective_power == 40: obj_pow_40_list.append(slide_num) else: obj_pow_other_list.append(slide_num) print("Associated images:") for ai_key in slide.associated_images.keys(): print(" " + str(ai_key) + ": " + str(slide.associated_images.get(ai_key))) print("Format: " + str(slide.detect_format(slide_filepath))) if display_all_properties: print("Properties:") for prop_key in slide.properties.keys(): print(" Property: " + str(prop_key) + ", value: " + str(slide.properties.get(prop_key))) print("\n\nSlide Magnifications:") print(" 20x Slides: " + str(obj_pow_20_list)) print(" 40x Slides: " + str(obj_pow_40_list)) print(" ??x Slides: " + str(obj_pow_other_list) + "\n") t.elapsed_display()
def multiprocess_training_slides_to_images(): """ Convert all WSI training slides to smaller images using multiple processes (one process per core). Each process will process a range of slide numbers. """ timer = Time() # how many processes to use num_processes = multiprocessing.cpu_count() num_processes = 12 pool = multiprocessing.Pool(num_processes) num_train_images = get_num_training_slides() if num_processes > num_train_images: num_processes = num_train_images images_per_process = num_train_images / num_processes print("Number of processes: " + str(num_processes)) print("Number of training images: " + str(num_train_images)) # each task specifies a range of slides tasks = [] for num_process in range(1, num_processes + 1): start_index = (num_process - 1) * images_per_process + 1 end_index = num_process * images_per_process start_index = int(start_index) end_index = int(end_index) tasks.append((start_index, end_index)) if start_index == end_index: print("Task #" + str(num_process) + ": Process slide " + str(start_index)) else: print("Task #" + str(num_process) + ": Process slides " + str(start_index) + " to " + str(end_index)) # start tasks results = [] for t in tasks: results.append(pool.apply_async(training_slide_range_to_images, t)) for result in results: (start_ind, end_ind) = result.get() if start_ind == end_ind: print("Done converting slide %d" % start_ind) else: print("Done converting slides %d through %d" % (start_ind, end_ind)) timer.elapsed_display()
def singleprocess_training_slides_to_images(slides_path, n_slides: Optional[int] = None ): ## ap """ Convert all WSI training slides to smaller images using a single process. """ t = Time() ## ap # num_train_images = get_num_training_slides() # training_slide_range_to_images(1, num_train_images) ## ap trainig_slides_to_images(slides_path, n_slides=n_slides) t.elapsed_display()
def slide_stats(): """ Display statistics/graphs about training slides. """ t = Time() if not os.path.exists(STATS_DIR): os.makedirs(STATS_DIR) num_train_images = get_num_training_slides() slide_stats = [] for slide_num in range(1, num_train_images + 1): slide_filepath = get_training_slide_path(slide_num) print("Opening Slide #%d: %s" % (slide_num, slide_filepath)) slide = open_slide(slide_filepath) (width, height) = slide.dimensions print(" Dimensions: {:,d} x {:,d}".format(width, height)) slide_stats.append((width, height)) max_width = 0 max_height = 0 min_width = sys.maxsize min_height = sys.maxsize total_width = 0 total_height = 0 total_size = 0 which_max_width = 0 which_max_height = 0 which_min_width = 0 which_min_height = 0 max_size = 0 min_size = sys.maxsize which_max_size = 0 which_min_size = 0 for z in range(0, num_train_images): (width, height) = slide_stats[z] if width > max_width: max_width = width which_max_width = z + 1 if width < min_width: min_width = width which_min_width = z + 1 if height > max_height: max_height = height which_max_height = z + 1 if height < min_height: min_height = height which_min_height = z + 1 size = width * height if size > max_size: max_size = size which_max_size = z + 1 if size < min_size: min_size = size which_min_size = z + 1 total_width = total_width + width total_height = total_height + height total_size = total_size + size avg_width = total_width / num_train_images avg_height = total_height / num_train_images avg_size = total_size / num_train_images stats_string = "" stats_string += "%-11s {:14,d} pixels (slide #%d)".format(max_width) % ( "Max width:", which_max_width) stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(max_height) % ( "Max height:", which_max_height) stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(max_size) % ( "Max size:", which_max_size) stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_width) % ( "Min width:", which_min_width) stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_height) % ( "Min height:", which_min_height) stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_size) % ( "Min size:", which_min_size) stats_string += "\n%-11s {:14,d} pixels".format( round(avg_width)) % "Avg width:" stats_string += "\n%-11s {:14,d} pixels".format( round(avg_height)) % "Avg height:" stats_string += "\n%-11s {:14,d} pixels".format( round(avg_size)) % "Avg size:" stats_string += "\n" print(stats_string) stats_string += "\nslide number,width,height" for i in range(0, len(slide_stats)): (width, height) = slide_stats[i] stats_string += "\n%d,%d,%d" % (i + 1, width, height) stats_string += "\n" stats_file = open(os.path.join(STATS_DIR, "stats.txt"), "w") stats_file.write(stats_string) stats_file.close() t.elapsed_display() x, y = zip(*slide_stats) colors = np.random.rand(num_train_images) sizes = [10 for n in range(num_train_images)] plt.scatter(x, y, s=sizes, c=colors, alpha=0.7) plt.xlabel("width (pixels)") plt.ylabel("height (pixels)") plt.title("SVS Image Sizes") plt.set_cmap("prism") plt.tight_layout() plt.savefig(os.path.join(STATS_DIR, "svs-image-sizes.png")) plt.show() plt.clf() plt.scatter(x, y, s=sizes, c=colors, alpha=0.7) plt.xlabel("width (pixels)") plt.ylabel("height (pixels)") plt.title("SVS Image Sizes (Labeled with slide numbers)") plt.set_cmap("prism") for i in range(num_train_images): snum = i + 1 plt.annotate(str(snum), (x[i], y[i])) plt.tight_layout() plt.savefig(os.path.join(STATS_DIR, "svs-image-sizes-slide-numbers.png")) plt.show() plt.clf() area = [w * h / 1000000 for (w, h) in slide_stats] plt.hist(area, bins=64) plt.xlabel("width x height (M of pixels)") plt.ylabel("# images") plt.title("Distribution of image sizes in millions of pixels") plt.tight_layout() plt.savefig(os.path.join(STATS_DIR, "distribution-of-svs-image-sizes.png")) plt.show() plt.clf() whratio = [w / h for (w, h) in slide_stats] plt.hist(whratio, bins=64) plt.xlabel("width to height ratio") plt.ylabel("# images") plt.title("Image shapes (width to height)") plt.tight_layout() plt.savefig(os.path.join(STATS_DIR, "w-to-h.png")) plt.show() plt.clf() hwratio = [h / w for (w, h) in slide_stats] plt.hist(hwratio, bins=64) plt.xlabel("height to width ratio") plt.ylabel("# images") plt.title("Image shapes (height to width)") plt.tight_layout() plt.savefig(os.path.join(STATS_DIR, "h-to-w.png")) plt.show()