def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256): for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) img_path = os.path.join(imgs_dir, ele) img_name = os.path.splitext(ele)[0] json_path = os.path.join(imgs_dir, img_name+".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :] patch_img = transform.resize(patch_img, (256, 256)) patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def check_contour_valid(slides_dir, annotation_dir): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ele in slide_list: # slide_path = os.path.join(slides_dir, ele) json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: print("Not annotated regions in {}".format(ele)) for cur_r in region_annos: cur_cnt = region_annos[cur_r]['cnts'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps)) cnt_arr[0] = cur_cnt['h'] cnt_arr[1] = cur_cnt['w'] poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) center_point = Point(np.mean(cnt_arr[1]), np.mean(cnt_arr[0])) if center_point.within(poly_cnt) == False: print("{} in {}".format(cur_r, ele))
def gen_gist_features(roi_dir, fea_dir, mode, args): fea_dir = os.path.join(fea_dir, args.model_name, mode) data_dir = os.path.join(roi_dir, mode) img_list = [ele for ele in os.listdir(data_dir) if "png" in ele] for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) cur_img_path = os.path.join(data_dir, ele) img_name = os.path.splitext(ele)[0] cur_anno_path = os.path.join(data_dir, img_name + ".json") if not (os.path.exists(cur_img_path) and os.path.exists(cur_anno_path)): print("File not available") img = io.imread(cur_img_path) anno_dict = format.json_to_dict(cur_anno_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] region_label = str(label_map[cur_anno['label']]) region_name = "_".join([img_name, 'r' + cur_r]) x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, args.patch_size, overlap_flag=True) Feas, BBoxes = [], [] for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + args.patch_size / 2, patch_start_h + args.patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + args.patch_size, patch_start_w:patch_start_w + args.patch_size, :] patch_desp = gist.extract(patch_img) Feas.append(patch_desp) BBoxes.append([ patch_start_h, patch_start_w, args.patch_size, args.patch_size ]) fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)} # save features cat_fea_dir = os.path.join(fea_dir, region_label) if not os.path.exists(cat_fea_dir): os.makedirs(cat_fea_dir) dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"), fea_dict)
def test_poly_transform(): np_arr = np.array([[1., 2., 4., 5., 3.], [1., 3., 4., 2., 0.]]) poly1 = np_arr_to_poly(np_arr) print("Bounds of poly1 is: ", poly1.exterior.bounds) point_list = np_arr_to_point_list(np_arr) poly2 = point_list_to_poly(point_list) print("Bounds of poly2 is: ", poly2.exterior.bounds) if poly1 != poly2: raise AssertionError("Conversion error") min_h, max_h = np.min(np_arr[0]), np.max(np_arr[0]) min_w, max_w = np.min(np_arr[1]), np.max(np_arr[1]) poly3 = bbox_to_poly(min_h, min_w, max_h, max_w) print("Bounds of poly3 is: ", poly3.exterior.bounds) # poly3.exterior.coords.xy poly1_arr = poly_to_np_arr(poly1) print("Numpy coordinates of poly1 is:") if not np.array_equal(np_arr, poly1_arr): raise AssertionError("Conversion error")
def contour_valid(cnt_arr): """ Check contour is valid or not. Parameters ------- cnt_arr: np.array contour with standard numpy 2d array format Returns ------- valid: boolean True if valid, else False """ poly = np_arr_to_poly(cnt_arr) valid = True if poly.is_valid else False return valid
def locate_tissue(slides_dir): slide_list = [] svs_file_list = filesystem.find_ext_files(slides_dir, "svs") slide_list.extend(svs_file_list) SVS_file_list = filesystem.find_ext_files(slides_dir, "SVS") slide_list.extend(SVS_file_list) tissue_dir = os.path.join(os.path.dirname(slides_dir), "Visualization/TissueLoc") filesystem.overwrite_dir(tissue_dir) for ind, slide_path in enumerate(slide_list): print("processing {}/{}".format(ind+1, len(slide_list))) # locate tissue contours with default parameters cnts, d_factor = tl.locate_tissue_cnts(slide_path, max_img_size=2048, smooth_sigma=13, thresh_val=0.88, min_tissue_size=120000) cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True) # if len(cnts) != 1: # print("There are {} contours in {}".format(len(cnts), os.path.basename(slide_path))) # load slide select_level, select_factor = tl.select_slide_level(slide_path, max_size=2048) wsi_head = pyramid.load_wsi_head(slide_path) slide_img = wsi_head.read_region((0, 0), select_level, wsi_head.level_dimensions[select_level]) slide_img = np.asarray(slide_img)[:,:,:3] slide_img = np.ascontiguousarray(slide_img, dtype=np.uint8) # change not valid poly to convex_hull cnt_arr = cv_cnt_to_np_arr(cnts[0]) cnt_poly = np_arr_to_poly(cnt_arr) if cnt_poly.is_valid == True: valid_cnt = cnts[0].astype(int) else: valid_arr = poly_to_np_arr(cnt_poly.convex_hull) valid_cnt = np_arr_to_cv_cnt(valid_arr).astype(int) cv2.drawContours(slide_img, [valid_cnt], 0, (0, 255, 0), 8) # overlay and save # cv2.drawContours(slide_img, cnts, 0, (0, 255, 0), 8) tissue_save_name = os.path.splitext(os.path.basename(slide_path))[0] + ".png" tissue_save_path = os.path.join(tissue_dir, tissue_save_name) io.imsave(tissue_save_path, slide_img)
def contour_to_poly_valid(cnt_arr): """ Convert contour to poly valid if not poly valid Parameters ------- cnt_arr: np.array contour with standard numpy 2d array format Returns ------- cnt_valid_arr: np.array contour with standard numpy 2d array format """ poly = np_arr_to_poly(cnt_arr) if poly.is_valid == True: cnt_valid_arr = cnt_arr else: cnt_valid_arr = poly_to_np_arr(poly.convex_hull) return cnt_valid_arr
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256): img_path = os.path.join(slide_dir, slide_name + ".png") json_path = os.path.join(slide_dir, slide_name + ".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + patch_size / 2, patch_start_h + patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + patch_size, patch_start_w:patch_start_w + patch_size, :] patch_cat_dir = os.path.join(patch_dir, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ind, ele in enumerate(slide_list): if ind > 0 and ind % 20 == 0: print("Processing {:3d}/{}".format(ind, len(slide_list))) slide_name = os.path.splitext(ele)[0] json_path = os.path.join(annotation_dir, slide_name + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: continue slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) level_dim = slide_head.level_dimensions[level] img_w, img_h = level_dim new_anno_dict = {} for cur_r in region_annos: cur_cnt = region_annos[cur_r]['cnts'] cur_desp = region_annos[cur_r]['desp'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.int32) cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']] cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']] if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0: continue if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w: continue poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1]) cnt_h = np.max(cnt_arr[0]) - start_h + 1 cnt_w = np.max(cnt_arr[1]) - start_w + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_center = Point(cur_w + start_w + size / 2, cur_h + start_h + size / 2) if patch_center.within(poly_cnt) == True: new_anno_dict[cur_r] = { 'label': cur_desp, 'h': cnt_arr[0].tolist(), 'w': cnt_arr[1].tolist() } break if len(new_anno_dict) > 0: wsi_img = slide_head.read_region((0, 0), level, level_dim) wsi_img = np.array(wsi_img)[:, :, :3] io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img) format.dict_to_json(new_anno_dict, os.path.join(level_dir, slide_name + ".json")) else: print("---{} have no proper regions---".format(slide_name))
def gen_patches(slide_path, annotation_dict, args): """ Generate patch images and masks based on annotations as well as slide information. """ # load slide header information slide_head = openslide.OpenSlide(slide_path) # 读取图像 slide_name = os.path.basename(slide_path) # slide id # level_count——幻灯片中的级别数。级别从0(最高分辨率)到level_count - 1(最低分辨率)编号。 if args.slide_level < 0 or args.slide_level >= slide_head.level_count: # slide_head.level_count = 4 print("level {} not availabel in {}".format(args.slide_level, slide_name)) sys.exit() img_save_dir, mask_save_dir = get_save_dirs(args) for cur_reg in annotation_dict: # slide_head.level_downsamples[args.slide_level = 1.0 # 原始坐标是(w,h) coords = (annotation_dict[cur_reg] / slide_head.level_downsamples[args.slide_level]).astype( np.int32) coords = np.transpose(np.array(coords)) # (2,428) coords[[0, 1]] = coords[[1, 0]] # swap width and height min_h, max_h = np.min(coords[0, :]), np.max( coords[0, :]) # 0 is height (22,20464) min_w, max_w = np.min(coords[1, :]), np.max( coords[1, :]) # 1 is width (6119,33036) num = 0 # 使用numpy二维数组([0]-h, [1]-w)构造多边形, 需要注意的是,高度是第一维,宽度是第二维 cur_poly = poly_transform.np_arr_to_poly(np.asarray(coords)) # 生成多边形 # 一个图片滑动10次 while num < 10: rand_h = np.random.randint(min_h, max_h) rand_w = np.random.randint(min_w, max_w) # 原始坐标是(w,h) h_over_flag = rand_h + args.crop_size >= slide_head.level_dimensions[ args.slide_level][1] w_over_flag = rand_w + args.crop_size >= slide_head.level_dimensions[ args.slide_level][0] if h_over_flag or w_over_flag: continue cen_h = int(rand_h + args.crop_size / 2) # (5385) cen_w = int(rand_w + args.crop_size / 2) # (14173) cen_point = Point(cen_w, cen_h) # 构造几何图形中心点 (14173,5385) patch_in_flag = cen_point.within(cur_poly) # 判断中心点是否在多边形之中 if not patch_in_flag: num += 1 continue # 将(1024,1024)的区域按随机生成的(rand_h,rand_w)裁剪出来 cur_patch = slide_head.read_region( (rand_w, rand_h), args.slide_level, (args.crop_size, args.crop_size)) # shape : (1024,1024) cur_patch = np.asarray(cur_patch)[:, :, : 3] # (1024,1024)-->(1024,1024,3) # correct patch mask on ignore pixels ## very slow, need to speed this ignore part 1024x1024次迭代 patch_mask = gen_patch_mask(args) for pw in range(rand_w, rand_w + args.crop_size): for ph in range(rand_h, rand_h + args.crop_size): cur_p = Point(pw, ph) # 当前坐标点 # 如果坐标点不在多边形曲线内,则mask置为0 if not cur_p.within(cur_poly): patch_mask[ph - rand_h, pw - rand_w] = 0 save_img = transform.resize( cur_patch, (args.save_size, args.save_size)) # (256,256,3) # 经过resize后变为了0或1的矩阵 (256,256) save_mask = transform.resize( patch_mask, (args.save_size, args.save_size), order=0) # order=0: Nearest-neighbor interpolation save_mask = (save_mask * 255).astype(np.uint8) img_fullname = str(uuid.uuid4())[:8] + ".png" # 随机生成8位唯一的id save_img_path = os.path.join(img_save_dir, img_fullname) save_mask_path = os.path.join(mask_save_dir, img_fullname) io.imsave(save_img_path, save_img) io.imsave(save_mask_path, save_mask) num = num + 1
def _slide_patch_generator(slide_path, annotation_dict, annotation_label): """ generates the image and mask patch from slide, and save them. :param slide_path: :param annotation_dict: :param annotation_label: :return: """ slide = openslide.OpenSlide(slide_path) slide_name = os.path.basename(slide_path) if opt.slide_level < 0 or opt.slide_level >= slide.level_count: raise Exception( f'level {opt.slide_level} is not available in the {slide_name}') if annotation_label not in annotation_label_dict: raise Exception( f"a value of an annotation '{annotation_label}' is not set") for curr_annotation_region in annotation_dict: annotation_coords = \ (annotation_dict[curr_annotation_region] / slide.level_downsamples[opt.slide_level]).astype(np.int32) annotation_coords = np.transpose(np.array(annotation_coords)) # swap width and height annotation_coords[[0, 1]] = annotation_coords[[1, 0]] min_h, max_h = np.min(annotation_coords[0, :]), np.max( annotation_coords[0, :]) min_w, max_w = np.min(annotation_coords[1, :]), np.max( annotation_coords[1, :]) try: annotation_polygon = poly_transform.np_arr_to_poly( np.asarray(annotation_coords)) except ValueError as e: logging.error( f'Failed to transform coordinates to polygon, this will be skipped,' f' slide_name: {slide_name}, region: {curr_annotation_region}') logging.error(f'Exception: {e}') continue patch_generate_try_cnt = 0 while patch_generate_try_cnt < opt.patch_gen_try_num: rand_h = np.random.randint(min_h, max_h) rand_w = np.random.randint(min_w, max_w) is_height_exceeds = rand_h + opt.crop_size >= slide.level_dimensions[ opt.slide_level][1] is_width_exceeds = rand_w + opt.crop_size >= slide.level_dimensions[ opt.slide_level][0] if is_width_exceeds or is_height_exceeds: continue patch_center_coord_h = int(rand_h + opt.crop_size / 2) patch_center_coord_w = int(rand_w + opt.crop_size / 2) patch_center_point = Point(patch_center_coord_w, patch_center_coord_h) if not patch_center_point.within(annotation_polygon): patch_generate_try_cnt += 1 continue curr_patch = slide.read_region((rand_w, rand_h), opt.slide_level, (opt.crop_size, opt.crop_size)) curr_patch = np.asarray(curr_patch)[:, :, :3] curr_patch_ground_truth = _generate_ground_truth( rand_w, rand_h, crop_size=opt.crop_size, annotation_polygon=annotation_polygon, pixel_annotation_value=annotation_label_dict[annotation_label]) curr_patch = transform.resize(curr_patch, (opt.save_size, opt.save_size)) # order=0: Nearest-neighbor interpolation curr_patch_ground_truth = transform.resize( curr_patch_ground_truth, (opt.save_size, opt.save_size), order=0, anti_aliasing=False) curr_patch = (curr_patch * 255).astype(np.uint8) curr_patch_ground_truth = curr_patch_ground_truth.astype(np.uint8) yield curr_patch, curr_patch_ground_truth patch_generate_try_cnt += 1
def gen_patches(slide_path, annotation_dict, args): """ Generate patch images and masks based on annotations as well as slide information. """ # load slide header information slide_head = openslide.OpenSlide(slide_path) slide_name = os.path.basename(slide_path) if args.slide_level < 0 or args.slide_level >= slide_head.level_count: print("level {} not availabel in {}".format(args.slide_level, slide_name)) sys.exit() img_save_dir, mask_save_dir = get_save_dirs(args) for cur_reg in annotation_dict: coords = (annotation_dict[cur_reg] / slide_head.level_downsamples[args.slide_level]).astype( np.int32) coords = np.transpose(np.array(coords)) coords[[0, 1]] = coords[[1, 0]] # swap width and height min_h, max_h = np.min(coords[0, :]), np.max(coords[0, :]) min_w, max_w = np.min(coords[1, :]), np.max(coords[1, :]) num = 0 cur_poly = poly_transform.np_arr_to_poly(np.asarray(coords)) while num < 10: rand_h = np.random.randint(min_h, max_h) rand_w = np.random.randint(min_w, max_w) h_over_flag = rand_h + args.crop_size >= slide_head.level_dimensions[ args.slide_level][1] w_over_flag = rand_w + args.crop_size >= slide_head.level_dimensions[ args.slide_level][0] if h_over_flag or w_over_flag: continue cen_h = int(rand_h + args.crop_size / 2) cen_w = int(rand_w + args.crop_size / 2) cen_point = Point(cen_w, cen_h) patch_in_flag = cen_point.within(cur_poly) if not patch_in_flag: num += 1 continue # crop patch image cur_patch = slide_head.read_region( (rand_w, rand_h), args.slide_level, (args.crop_size, args.crop_size)) cur_patch = np.asarray(cur_patch)[:, :, :3] # correct patch mask on ignore pixels ## very slow, need to speed this ignore part patch_mask = gen_patch_mask(args) for pw in range(rand_w, rand_w + args.crop_size): for ph in range(rand_h, rand_h + args.crop_size): cur_p = Point(pw, ph) if not cur_p.within(cur_poly): patch_mask[ph - rand_h, pw - rand_w] = 0 save_img = transform.resize(cur_patch, (args.save_size, args.save_size)) save_mask = transform.resize( patch_mask, (args.save_size, args.save_size), order=0) # order=0: Nearest-neighbor interpolation save_mask = (save_mask * 255).astype(np.uint8) img_fullname = str(uuid.uuid4())[:8] + ".png" save_img_path = os.path.join(img_save_dir, img_fullname) save_mask_path = os.path.join(mask_save_dir, img_fullname) io.imsave(save_img_path, save_img) io.imsave(save_mask_path, save_mask) num = num + 1
def gen_samples(slides_dir, patch_level, patch_size, tumor_type, slide_list, dset, overlap_mode): # prepare saving directory patch_path = os.path.join(os.path.dirname(slides_dir), "Patches", tumor_type) patch_img_dir = os.path.join(patch_path, dset, "imgs") if not os.path.exists(patch_img_dir): os.makedirs(patch_img_dir) patch_mask_dir = os.path.join(patch_path, dset, "masks") if not os.path.exists(patch_mask_dir): os.makedirs(patch_mask_dir) # processing slide one-by-one ttl_patch = 0 slide_list.sort() for ind, ele in enumerate(slide_list): print("Processing {} {}/{}".format(ele, ind+1, len(slide_list))) cur_slide_path = os.path.join(slides_dir, ele+".svs") if os.path.exists(cur_slide_path): cur_slide_path = os.path.join(slides_dir, ele+".svs") # locate contours and generate batches based on tissue contours cnts, d_factor = tl.locate_tissue_cnts(cur_slide_path, max_img_size=2048, smooth_sigma=13, thresh_val=0.88, min_tissue_size=120000) select_level, select_factor = tl.select_slide_level(cur_slide_path, max_size=2048) cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True) # scale contour to slide level 2 wsi_head = pyramid.load_wsi_head(cur_slide_path) cnt_scale = select_factor / int(wsi_head.level_downsamples[patch_level]) tissue_arr = cv_cnt_to_np_arr(cnts[0] * cnt_scale).astype(np.int32) # convert tissue_arr to convex if poly is not valid tissue_poly = np_arr_to_poly(tissue_arr) if tissue_poly.is_valid == False: tissue_arr = poly_to_np_arr(tissue_poly.convex_hull).astype(int) coors_arr = None if overlap_mode == "half_overlap": level_w, level_h = wsi_head.level_dimensions[patch_level] coors_arr = contour.contour_patch_splitting_half_overlap(tissue_arr, level_h, level_w, patch_size, inside_ratio=0.80) elif overlap_mode == "self_overlap": coors_arr = contour.contour_patch_splitting_self_overlap(tissue_arr, patch_size, inside_ratio=0.80) else: raise NotImplementedError("unknown overlapping mode") wsi_img = wsi_head.read_region((0, 0), patch_level, wsi_head.level_dimensions[patch_level]) wsi_img = np.asarray(wsi_img)[:,:,:3] mask_path = os.path.join(slides_dir, "_".join([ele, tumor_type+".tif"])) mask_img = io.imread(mask_path) wsi_mask = (transform.resize(mask_img, wsi_img.shape[:2], order=0) * 255).astype(np.uint8) * 255 if dset == "val": test_slides_dir = os.path.join(os.path.dirname(slides_dir), "TestSlides") if not os.path.exists(os.path.join(test_slides_dir, cur_slide_path)): shutil.copy(cur_slide_path, test_slides_dir) if not os.path.exists(os.path.join(test_slides_dir, mask_path)): shutil.copy(mask_path, test_slides_dir) for cur_arr in coors_arr: cur_h, cur_w = cur_arr[0], cur_arr[1] cur_patch = wsi_img[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size] if cur_patch.shape[0] != patch_size or cur_patch.shape[1] != patch_size: continue cur_mask = wsi_mask[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size] # background RGB (235, 210, 235) * [0.299, 0.587, 0.114] if patch.patch_bk_ratio(cur_patch, bk_thresh=0.864) > 0.88: continue if overlap_mode == "half_overlap" and tumor_type == "viable": pixel_ratio = np.sum(cur_mask > 0) * 1.0 / cur_mask.size if pixel_ratio < 0.05: continue patch_name = ele + "_" + str(uuid.uuid1())[:8] io.imsave(os.path.join(patch_img_dir, patch_name+".jpg"), cur_patch) io.imsave(os.path.join(patch_mask_dir, patch_name+".png"), cur_mask) ttl_patch += 1 print("There are {} patches in total.".format(ttl_patch))