def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256): for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) img_path = os.path.join(imgs_dir, ele) img_name = os.path.splitext(ele)[0] json_path = os.path.join(imgs_dir, img_name+".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :] patch_img = transform.resize(patch_img, (256, 256)) patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def gen_gist_features(roi_dir, fea_dir, mode, args): fea_dir = os.path.join(fea_dir, args.model_name, mode) data_dir = os.path.join(roi_dir, mode) img_list = [ele for ele in os.listdir(data_dir) if "png" in ele] for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) cur_img_path = os.path.join(data_dir, ele) img_name = os.path.splitext(ele)[0] cur_anno_path = os.path.join(data_dir, img_name + ".json") if not (os.path.exists(cur_img_path) and os.path.exists(cur_anno_path)): print("File not available") img = io.imread(cur_img_path) anno_dict = format.json_to_dict(cur_anno_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] region_label = str(label_map[cur_anno['label']]) region_name = "_".join([img_name, 'r' + cur_r]) x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, args.patch_size, overlap_flag=True) Feas, BBoxes = [], [] for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + args.patch_size / 2, patch_start_h + args.patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + args.patch_size, patch_start_w:patch_start_w + args.patch_size, :] patch_desp = gist.extract(patch_img) Feas.append(patch_desp) BBoxes.append([ patch_start_h, patch_start_w, args.patch_size, args.patch_size ]) fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)} # save features cat_fea_dir = os.path.join(fea_dir, region_label) if not os.path.exists(cat_fea_dir): os.makedirs(cat_fea_dir) dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"), fea_dict)
def gen_patches(img_dir, patch_dir, patch_size=448): img_list = pydaily.filesystem.find_ext_files(img_dir, "jpg") img_list = [os.path.basename(ele) for ele in img_list] pos_patch_dir = os.path.join(patch_dir, "1Pos") if not os.path.exists(pos_patch_dir): os.makedirs(pos_patch_dir) neg_patch_dir = os.path.join(patch_dir, "0Neg") if not os.path.exists(neg_patch_dir): os.makedirs(neg_patch_dir) pos_num, neg_num = 0, 0 for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) img_path = os.path.join(img_dir, ele) mask_path = os.path.join(img_dir, os.path.splitext(ele)[0] + ".png") cur_img = io.imread(img_path) cur_mask = io.imread(mask_path) # split coors and save patches coors_arr = patch.wsi_coor_splitting(cur_img.shape[0], cur_img.shape[1], patch_size, overlap_flag=True) for coor in coors_arr: start_h, start_w = coor[0], coor[1] patch_img = cur_img[start_h:start_h + patch_size, start_w:start_w + patch_size] # image background control if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) > 0.88: continue # mask control patch_mask = cur_mask[start_h:start_h + patch_size, start_w:start_w + patch_size] pixel_ratio = np.sum(patch_mask > 0) * 1.0 / patch_mask.size patch_name = str(uuid.uuid4())[:8] if pixel_ratio >= 0.05: io.imsave(os.path.join(pos_patch_dir, patch_name + ".png"), patch_img) pos_num += 1 else: if np.random.random_sample() <= 0.80 and "neg" in img_dir: continue io.imsave(os.path.join(neg_patch_dir, patch_name + ".png"), patch_img) neg_num += 1 print("There are {} pos samples and {} neg samples".format( pos_num, neg_num))
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256): img_path = os.path.join(slide_dir, slide_name + ".png") json_path = os.path.join(slide_dir, slide_name + ".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + patch_size / 2, patch_start_h + patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + patch_size, patch_start_w:patch_start_w + patch_size, :] patch_cat_dir = os.path.join(patch_dir, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ind, ele in enumerate(slide_list): if ind > 0 and ind % 20 == 0: print("Processing {:3d}/{}".format(ind, len(slide_list))) slide_name = os.path.splitext(ele)[0] json_path = os.path.join(annotation_dir, slide_name + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: continue slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) level_dim = slide_head.level_dimensions[level] img_w, img_h = level_dim new_anno_dict = {} for cur_r in region_annos: cur_cnt = region_annos[cur_r]['cnts'] cur_desp = region_annos[cur_r]['desp'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.int32) cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']] cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']] if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0: continue if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w: continue poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1]) cnt_h = np.max(cnt_arr[0]) - start_h + 1 cnt_w = np.max(cnt_arr[1]) - start_w + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_center = Point(cur_w + start_w + size / 2, cur_h + start_h + size / 2) if patch_center.within(poly_cnt) == True: new_anno_dict[cur_r] = { 'label': cur_desp, 'h': cnt_arr[0].tolist(), 'w': cnt_arr[1].tolist() } break if len(new_anno_dict) > 0: wsi_img = slide_head.read_region((0, 0), level, level_dim) wsi_img = np.array(wsi_img)[:, :, :3] io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img) format.dict_to_json(new_anno_dict, os.path.join(level_dir, slide_name + ".json")) else: print("---{} have no proper regions---".format(slide_name))
def test_wsi_coor_splitting(): coors_arr = patch.wsi_coor_splitting(wsi_h=1536, wsi_w=2048, length=224, overlap_flag=True)