def save_mask_compare(slides_dir, slide_filenames): slide_num = len(slide_filenames) mask_save_dir = os.path.join(os.path.dirname(slides_dir), "Visualization/Masks") filesystem.overwrite_dir(mask_save_dir) for ind in np.arange(slide_num): print("processing {}/{}".format(ind + 1, slide_num)) check_slide_mask(slides_dir, slide_filenames, ind)
def unzip_slides(slides_dir): """ Unzip all slide files """ unzip_dir = os.path.join(os.path.dirname(slides_dir), "LiverImages") filesystem.overwrite_dir(unzip_dir) zip_list = [ele for ele in os.listdir(slides_dir) if "zip" in ele] for ind, ele in enumerate(zip_list): print("processing {}/{}".format(ind+1, len(zip_list))) zip_ref = zipfile.ZipFile(os.path.join(slides_dir, ele), 'r') zip_ref.extractall(unzip_dir) zip_ref.close()
def annotate_images(data_dir, cur_set, cur_cat, slide_level): slides_dir = os.path.join(data_dir, "Slides", cur_set, cur_cat) slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] annotation_dir = os.path.join(data_dir, "Annotations", cur_set, cur_cat) l2_annotate_dir = os.path.join(data_dir, "L4AnnotatedImgs", cur_set, cur_cat) filesystem.overwrite_dir(l2_annotate_dir) for cur_slide in slide_list: slide_name = os.path.splitext(cur_slide)[0] slide_head = openslide.OpenSlide(os.path.join(slides_dir, cur_slide)) slide_img = slide_head.read_region(location=(0, 0), level=slide_level, size=slide_head.level_dimensions[slide_level]) slide_img = np.asarray(slide_img)[:, :, :3] annotate_json_path = os.path.join(annotation_dir, slide_name+".json") annotation_dict = format.json_to_dict(annotate_json_path) region_dict = annotation_dict['regions'] for key in region_dict.keys(): cur_label = region_dict[key]['desp'] draw_rgb = None if cur_label == "Benign": draw_rgb = (0, 0, 255) elif cur_label == "Uncertain": draw_rgb = (0, 255, 0) elif cur_label == "Malignant": draw_rgb = (255, 0, 0) else: print("Unknow description: {}".format(cur_label)) continue cur_cnts = region_dict[key]['cnts'] num_points = len(cur_cnts["h"]) points_coors = np.zeros((2, num_points), dtype=np.int32) for ind in range(num_points): points_coors[0, ind] = int(round(cur_cnts['h'][ind] / np.power(2, slide_level))) points_coors[1, ind] = int(round(cur_cnts['w'][ind] / np.power(2, slide_level))) slide_img = combine.overlay_contour(slide_img, points_coors, draw_rgb, cnt_width=5) tl_pos = (int(np.mean(points_coors[0])), int(np.mean(points_coors[1]))) cv2.putText(slide_img, cur_label, tl_pos, cv2.FONT_HERSHEY_SIMPLEX, 3, (148,24,32), 3, cv2.LINE_AA) annotate_slide_path = os.path.join(l2_annotate_dir, slide_name+".png") io.imsave(annotate_slide_path, slide_img)
def extract_karyotypes(karyotype_path): img = io.imread(karyotype_path) if len(img.shape) == 3: img = img[:, :, 0] img_fullname = os.path.basename(karyotype_path) img_dir = os.path.dirname(karyotype_path) img_name = os.path.splitext(img_fullname)[0].replace(" ", "") karyotype_dir = os.path.join(img_dir, img_name) filesystem.overwrite_dir(karyotype_dir) # print("Processing {}".format(img_name))ggg for key in CHROMOSOME_POS_DICT.keys(): cur_h_start = CHROMOSOME_POS_DICT[key]['h_start'] cur_w_start = CHROMOSOME_POS_DICT[key]['w_start'] cur_h_end = CHROMOSOME_POS_DICT[key]['h_end'] cur_w_end = CHROMOSOME_POS_DICT[key]['w_end'] sub_img = img[cur_h_start:cur_h_end, cur_w_start:cur_w_end] cur_chromosome_path = os.path.join(karyotype_dir, key + ".bmp") io.imsave(cur_chromosome_path, sub_img)
def locate_tissue(slides_dir): slide_list = [] svs_file_list = filesystem.find_ext_files(slides_dir, "svs") slide_list.extend(svs_file_list) SVS_file_list = filesystem.find_ext_files(slides_dir, "SVS") slide_list.extend(SVS_file_list) tissue_dir = os.path.join(os.path.dirname(slides_dir), "Visualization/TissueLoc") filesystem.overwrite_dir(tissue_dir) for ind, slide_path in enumerate(slide_list): print("processing {}/{}".format(ind+1, len(slide_list))) # locate tissue contours with default parameters cnts, d_factor = tl.locate_tissue_cnts(slide_path, max_img_size=2048, smooth_sigma=13, thresh_val=0.88, min_tissue_size=120000) cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True) # if len(cnts) != 1: # print("There are {} contours in {}".format(len(cnts), os.path.basename(slide_path))) # load slide select_level, select_factor = tl.select_slide_level(slide_path, max_size=2048) wsi_head = pyramid.load_wsi_head(slide_path) slide_img = wsi_head.read_region((0, 0), select_level, wsi_head.level_dimensions[select_level]) slide_img = np.asarray(slide_img)[:,:,:3] slide_img = np.ascontiguousarray(slide_img, dtype=np.uint8) # change not valid poly to convex_hull cnt_arr = cv_cnt_to_np_arr(cnts[0]) cnt_poly = np_arr_to_poly(cnt_arr) if cnt_poly.is_valid == True: valid_cnt = cnts[0].astype(int) else: valid_arr = poly_to_np_arr(cnt_poly.convex_hull) valid_cnt = np_arr_to_cv_cnt(valid_arr).astype(int) cv2.drawContours(slide_img, [valid_cnt], 0, (0, 255, 0), 8) # overlay and save # cv2.drawContours(slide_img, cnts, 0, (0, 255, 0), 8) tissue_save_name = os.path.splitext(os.path.basename(slide_path))[0] + ".png" tissue_save_path = os.path.join(tissue_dir, tissue_save_name) io.imsave(tissue_save_path, slide_img)
def gen_contour_overlay(slides_dir, annotation_dir, overlap_dir, img_level=4, save_roi=False, save_wsi=True): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") filesystem.overwrite_dir(overlap_dir) for ele in slide_list: slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) wsi_img = slide_head.read_region((0, 0), img_level, slide_head.level_dimensions[img_level]) wsi_img = np.ascontiguousarray(np.array(wsi_img)[:,:,:3]) json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: print("Not annotated regions in {}".format(ele)) for cur_r in region_annos: r_desp = region_annos[cur_r]['desp'] if r_desp != "Malignant": continue cur_cnt = region_annos[cur_r]['cnts'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.float32) cnt_arr[0] = cur_cnt['h'] / np.power(2, img_level) cnt_arr[1] = cur_cnt['w'] / np.power(2, img_level) cv_cnt = cv2_transform.np_arr_to_cv_cnt(cnt_arr).astype(np.int32) cv2.drawContours(wsi_img, [cv_cnt], 0, (0, 255, 0), 3) if save_roi == True: overlay_roi_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + "_r" + cur_r + ".png") start_h, end_h = int(min(cnt_arr[0])), int(max(cnt_arr[0])) start_w, end_w = int(min(cnt_arr[1])), int(max(cnt_arr[1])) io.imsave(overlay_roi_path, wsi_img[start_h:end_h, start_w:end_w]) if save_wsi == True: overlay_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + ".png") io.imsave(overlay_path, wsi_img)
def test_slide_seg(args): model = None if args.model_name == "UNet": model = UNet(n_channels=args.in_channels, n_classes=args.class_num) elif args.model_name == "PSP": model = pspnet.PSPNet(n_classes=19, input_size=(512, 512)) model.classification = nn.Conv2d(512, args.class_num, kernel_size=1) else: raise AssertionError("Unknow modle: {}".format(args.model_name)) model_path = os.path.join(args.model_dir, args.tumor_type, args.split, args.best_model) model = nn.DataParallel(model) model.load_state_dict(torch.load(model_path)) model.cuda() model.eval() since = time.time() result_dir = os.path.join(args.result_dir, args.tumor_type) filesystem.overwrite_dir(result_dir) slide_names = get_slide_filenames(args.slides_dir) if args.save_org and args.tumor_type == "viable": org_result_dir = os.path.join(result_dir, "Level0") filesystem.overwrite_dir(org_result_dir) for num, cur_slide in enumerate(slide_names): print("--{:02d}/{:02d} Slide:{}".format(num+1, len(slide_names), cur_slide)) metrics = defaultdict(float) # load level-2 slide slide_path = os.path.join(args.slides_dir, cur_slide+".svs") if not os.path.exists(slide_path): slide_path = os.path.join(args.slides_dir, cur_slide+".SVS") wsi_head = pyramid.load_wsi_head(slide_path) p_level = args.slide_level pred_h, pred_w = (wsi_head.level_dimensions[p_level][1], wsi_head.level_dimensions[p_level][0]) slide_img = wsi_head.read_region((0, 0), p_level, wsi_head.level_dimensions[p_level]) slide_img = np.asarray(slide_img)[:,:,:3] coors_arr = wsi_stride_splitting(pred_h, pred_w, patch_len=args.patch_len, stride_len=args.stride_len) patch_arr, wmap = gen_patch_wmap(slide_img, coors_arr, plen=args.patch_len) patch_dset = PatchDataset(patch_arr, mask_arr=None, normalize=args.normalize, tumor_type=args.tumor_type) patch_loader = DataLoader(patch_dset, batch_size=args.batch_size, shuffle=False, num_workers=4, drop_last=False) ttl_samples = 0 pred_map = np.zeros_like(wmap).astype(np.float32) for ind, patches in enumerate(patch_loader): inputs = Variable(patches.cuda()) with torch.no_grad(): outputs = model(inputs) preds = F.sigmoid(outputs) preds = torch.squeeze(preds, dim=1).data.cpu().numpy() if (ind+1)*args.batch_size <= len(coors_arr): patch_coors = coors_arr[ind*args.batch_size:(ind+1)*args.batch_size] else: patch_coors = coors_arr[ind*args.batch_size:] for ind, coor in enumerate(patch_coors): ph, pw = coor[0], coor[1] pred_map[ph:ph+args.patch_len, pw:pw+args.patch_len] += preds[ind] ttl_samples += inputs.size(0) prob_pred = np.divide(pred_map, wmap) slide_pred = (prob_pred > 0.5).astype(np.uint8) pred_save_path = os.path.join(result_dir, cur_slide + "_" + args.tumor_type + ".tif") io.imsave(pred_save_path, slide_pred*255) if args.save_org and args.tumor_type == "viable": org_w, org_h = wsi_head.level_dimensions[0] org_pred = transform.resize(prob_pred, (org_h, org_w)) org_pred = (org_pred > 0.5).astype(np.uint8) org_save_path = os.path.join(org_result_dir, cur_slide[-3:] + ".tif") imsave(org_save_path, org_pred, compress=9) time_elapsed = time.time() - since print('Testing takes {:.0f}m {:.2f}s'.format(time_elapsed // 60, time_elapsed % 60))
mode=args.mode) net.cuda() # Dataset preparetion train_data_root = os.path.join(args.data_dir, "Feas", args.model_type, "train") val_data_root = os.path.join(args.data_dir, "Feas", args.model_type, "val") # create dataset train_dataset = ThyroidDataSet(train_data_root, testing=False, pre_load=args.pre_load) val_dataset = ThyroidDataSet(val_data_root, testing=True, testing_num=128, pre_load=args.pre_load) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True) val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True) print(">> START training") model_root = os.path.join(args.data_dir, "Models", "SlideModels", args.model_type, args.mode) filesystem.overwrite_dir(model_root) train_cls(train_dataloader, val_dataloader, model_root, net, args)
def test_overwrite_dir(): test_dir = os.path.join(DATA_DIR, 'input/TestResults') overwrite_dir(test_dir)