def gen_contour_overlay(slides_dir, annotation_dir, overlap_dir, img_level=4): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ele in slide_list: slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) wsi_img = slide_head.read_region( (0, 0), img_level, slide_head.level_dimensions[img_level]) wsi_img = np.ascontiguousarray(np.array(wsi_img)[:, :, :3]) json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: print("Not annotated regions in {}".format(ele)) for cur_r in region_annos: r_desp = region_annos[cur_r]['desp'] cur_cnt = region_annos[cur_r]['cnts'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.float32) cnt_arr[0] = cur_cnt['h'] / np.power(2, img_level) cnt_arr[1] = cur_cnt['w'] / np.power(2, img_level) cv_cnt = cv2_transform.np_arr_to_cv_cnt(cnt_arr).astype(np.int32) cv2.drawContours(wsi_img, [cv_cnt], 0, (0, 255, 0), 3) tl_pos = (int(np.mean(cnt_arr[1])), int(np.mean(cnt_arr[0]))) cv2.putText(wsi_img, r_desp, tl_pos, cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 3, cv2.LINE_AA) overlay_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + ".png") io.imsave(overlay_path, wsi_img)
def gen_gist_features(roi_dir, fea_dir, mode, args): fea_dir = os.path.join(fea_dir, args.model_name, mode) data_dir = os.path.join(roi_dir, mode) img_list = [ele for ele in os.listdir(data_dir) if "png" in ele] for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) cur_img_path = os.path.join(data_dir, ele) img_name = os.path.splitext(ele)[0] cur_anno_path = os.path.join(data_dir, img_name + ".json") if not (os.path.exists(cur_img_path) and os.path.exists(cur_anno_path)): print("File not available") img = io.imread(cur_img_path) anno_dict = format.json_to_dict(cur_anno_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] region_label = str(label_map[cur_anno['label']]) region_name = "_".join([img_name, 'r' + cur_r]) x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, args.patch_size, overlap_flag=True) Feas, BBoxes = [], [] for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + args.patch_size / 2, patch_start_h + args.patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + args.patch_size, patch_start_w:patch_start_w + args.patch_size, :] patch_desp = gist.extract(patch_img) Feas.append(patch_desp) BBoxes.append([ patch_start_h, patch_start_w, args.patch_size, args.patch_size ]) fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)} # save features cat_fea_dir = os.path.join(fea_dir, region_label) if not os.path.exists(cat_fea_dir): os.makedirs(cat_fea_dir) dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"), fea_dict)
def check_contour_valid(slides_dir, annotation_dir): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ele in slide_list: # slide_path = os.path.join(slides_dir, ele) json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: print("Not annotated regions in {}".format(ele)) for cur_r in region_annos: cur_cnt = region_annos[cur_r]['cnts'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps)) cnt_arr[0] = cur_cnt['h'] cnt_arr[1] = cur_cnt['w'] poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) center_point = Point(np.mean(cnt_arr[1]), np.mean(cnt_arr[0])) if center_point.within(poly_cnt) == False: print("{} in {}".format(cur_r, ele))
def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256): for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) img_path = os.path.join(imgs_dir, ele) img_name = os.path.splitext(ele)[0] json_path = os.path.join(imgs_dir, img_name+".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :] patch_img = transform.resize(patch_img, (256, 256)) patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def annotate_images(data_dir, cur_set, cur_cat, slide_level): slides_dir = os.path.join(data_dir, "Slides", cur_set, cur_cat) slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] annotation_dir = os.path.join(data_dir, "Annotations", cur_set, cur_cat) l2_annotate_dir = os.path.join(data_dir, "L4AnnotatedImgs", cur_set, cur_cat) filesystem.overwrite_dir(l2_annotate_dir) for cur_slide in slide_list: slide_name = os.path.splitext(cur_slide)[0] slide_head = openslide.OpenSlide(os.path.join(slides_dir, cur_slide)) slide_img = slide_head.read_region(location=(0, 0), level=slide_level, size=slide_head.level_dimensions[slide_level]) slide_img = np.asarray(slide_img)[:, :, :3] annotate_json_path = os.path.join(annotation_dir, slide_name+".json") annotation_dict = format.json_to_dict(annotate_json_path) region_dict = annotation_dict['regions'] for key in region_dict.keys(): cur_label = region_dict[key]['desp'] draw_rgb = None if cur_label == "Benign": draw_rgb = (0, 0, 255) elif cur_label == "Uncertain": draw_rgb = (0, 255, 0) elif cur_label == "Malignant": draw_rgb = (255, 0, 0) else: print("Unknow description: {}".format(cur_label)) continue cur_cnts = region_dict[key]['cnts'] num_points = len(cur_cnts["h"]) points_coors = np.zeros((2, num_points), dtype=np.int32) for ind in range(num_points): points_coors[0, ind] = int(round(cur_cnts['h'][ind] / np.power(2, slide_level))) points_coors[1, ind] = int(round(cur_cnts['w'][ind] / np.power(2, slide_level))) slide_img = combine.overlay_contour(slide_img, points_coors, draw_rgb, cnt_width=5) tl_pos = (int(np.mean(points_coors[0])), int(np.mean(points_coors[1]))) cv2.putText(slide_img, cur_label, tl_pos, cv2.FONT_HERSHEY_SIMPLEX, 3, (148,24,32), 3, cv2.LINE_AA) annotate_slide_path = os.path.join(l2_annotate_dir, slide_name+".png") io.imsave(annotate_slide_path, slide_img)
def save_wsi_annotation(slide_path, json_path, h5_path, slide_level): slide_head = openslide.OpenSlide(slide_path) region_dict = {} annotation_dict = format.json_to_dict(json_path) regions = annotation_dict['regions'] for region_id in regions.keys(): region_name = 'r' + str(region_id) cur_region = {} if regions[region_id]['desp'] == "Benign": cur_region['desp'] = "1Benign" elif regions[region_id]['desp'] == "Uncertain": cur_region['desp'] = "2Uncertain" elif regions[region_id]['desp'] == "Malignant": cur_region['desp'] = "3Malignant" else: print("Unknow description: {}".format(regions[region_id]['desp'])) continue cur_cnts = regions[region_id]['cnts'] num_points = len(cur_cnts["h"]) points_coors = np.zeros((2, num_points), dtype=np.int32) for ind in range(num_points): points_coors[0, ind] = int( round(cur_cnts['h'][ind] / np.power(2, slide_level))) points_coors[1, ind] = int( round(cur_cnts['w'][ind] / np.power(2, slide_level))) cur_region['cnts'] = points_coors start_h, start_w = np.min(points_coors[0, :]), np.min( points_coors[1, :]) region_h = np.max(points_coors[0, :]) - start_h + 1 region_w = np.max(points_coors[1, :]) - start_w + 1 region_img = slide_head.read_region(location=(start_w, start_h), level=slide_level, size=(region_w, region_h)) region_img = np.asarray(region_img)[:, :, :3] cur_region['img'] = region_img region_dict[region_name] = cur_region dd.io.save(h5_path, region_dict)
def gen_contour_overlay(slides_dir, annotation_dir, overlap_dir, img_level=4, save_roi=False, save_wsi=True): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") filesystem.overwrite_dir(overlap_dir) for ele in slide_list: slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) wsi_img = slide_head.read_region((0, 0), img_level, slide_head.level_dimensions[img_level]) wsi_img = np.ascontiguousarray(np.array(wsi_img)[:,:,:3]) json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: print("Not annotated regions in {}".format(ele)) for cur_r in region_annos: r_desp = region_annos[cur_r]['desp'] if r_desp != "Malignant": continue cur_cnt = region_annos[cur_r]['cnts'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.float32) cnt_arr[0] = cur_cnt['h'] / np.power(2, img_level) cnt_arr[1] = cur_cnt['w'] / np.power(2, img_level) cv_cnt = cv2_transform.np_arr_to_cv_cnt(cnt_arr).astype(np.int32) cv2.drawContours(wsi_img, [cv_cnt], 0, (0, 255, 0), 3) if save_roi == True: overlay_roi_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + "_r" + cur_r + ".png") start_h, end_h = int(min(cnt_arr[0])), int(max(cnt_arr[0])) start_w, end_w = int(min(cnt_arr[1])), int(max(cnt_arr[1])) io.imsave(overlay_roi_path, wsi_img[start_h:end_h, start_w:end_w]) if save_wsi == True: overlay_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + ".png") io.imsave(overlay_path, wsi_img)
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256): img_path = os.path.join(slide_dir, slide_name + ".png") json_path = os.path.join(slide_dir, slide_name + ".json") if not (os.path.exists(img_path) and os.path.exists(json_path)): print("File not available") img = io.imread(img_path) anno_dict = format.json_to_dict(json_path) for cur_r in anno_dict: cur_anno = anno_dict[cur_r] x_coors, y_coors = cur_anno['w'], cur_anno['h'] cnt_arr = np.zeros((2, len(x_coors)), np.int32) cnt_arr[0], cnt_arr[1] = y_coors, x_coors poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_x, start_y = min(x_coors), min(y_coors) cnt_w = max(x_coors) - start_x + 1 cnt_h = max(y_coors) - start_y + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y patch_center = Point(patch_start_w + patch_size / 2, patch_start_h + patch_size / 2) if patch_center.within(poly_cnt) == True: patch_img = img[patch_start_h:patch_start_h + patch_size, patch_start_w:patch_start_w + patch_size, :] patch_cat_dir = os.path.join(patch_dir, str(label_map[cur_anno['label']])) if os.path.exists(patch_cat_dir) == False: os.makedirs(patch_cat_dir) patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png') io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256): slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele] json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele] if len(slide_list) != len(json_list): raise AssertionError("Annotation not complete") for ind, ele in enumerate(slide_list): if ind > 0 and ind % 20 == 0: print("Processing {:3d}/{}".format(ind, len(slide_list))) slide_name = os.path.splitext(ele)[0] json_path = os.path.join(annotation_dir, slide_name + ".json") anno_dict = format.json_to_dict(json_path) region_annos = anno_dict["regions"] if len(region_annos) <= 0: continue slide_path = os.path.join(slides_dir, ele) slide_head = openslide.OpenSlide(slide_path) level_dim = slide_head.level_dimensions[level] img_w, img_h = level_dim new_anno_dict = {} for cur_r in region_annos: cur_cnt = region_annos[cur_r]['cnts'] cur_desp = region_annos[cur_r]['desp'] num_ps = len(cur_cnt['h']) cnt_arr = np.zeros((2, num_ps), np.int32) cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']] cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']] if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0: continue if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w: continue poly_cnt = poly_transform.np_arr_to_poly(cnt_arr) start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1]) cnt_h = np.max(cnt_arr[0]) - start_h + 1 cnt_w = np.max(cnt_arr[1]) - start_w + 1 coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, size, overlap_flag=True) for cur_h, cur_w in coors_arr: patch_center = Point(cur_w + start_w + size / 2, cur_h + start_h + size / 2) if patch_center.within(poly_cnt) == True: new_anno_dict[cur_r] = { 'label': cur_desp, 'h': cnt_arr[0].tolist(), 'w': cnt_arr[1].tolist() } break if len(new_anno_dict) > 0: wsi_img = slide_head.read_region((0, 0), level, level_dim) wsi_img = np.array(wsi_img)[:, :, :3] io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img) format.dict_to_json(new_anno_dict, os.path.join(level_dir, slide_name + ".json")) else: print("---{} have no proper regions---".format(slide_name))
if __name__ == "__main__": # extract prepared ground truth viable tumor burden source_slides_dir = "../data/SourceData" phase1_path = os.path.join(source_slides_dir, "Phase_1_tumor_burden.csv") phase2_path = os.path.join(source_slides_dir, "Phase_2_tumor_burden.csv") gt_burden_dict = {} phase1_burden_dict = extract_csv_burden(phase1_path, case_num=20) gt_burden_dict.update(phase1_burden_dict) phase2_burden_dict = extract_csv_burden(phase2_path, case_num=30) gt_burden_dict.update(phase2_burden_dict) # get calculate viable tumor burden slides_dir = os.path.join(os.path.dirname(source_slides_dir), "LiverImages") cal_train_burden(slides_dir) # load calcualted burden cal_burden_path = os.path.join(source_slides_dir, "calculated_tumor_burden.json") cal_burden_dict = format.json_to_dict(cal_burden_path) # compare gt & cal for ind, key in enumerate(gt_burden_dict): if key not in cal_burden_dict: print("Error: {}".format(key)) gt_burden = gt_burden_dict[key] cal_burden = cal_burden_dict[key] if np.absolute(gt_burden-cal_burden) > 0.001: print("{}/{} {} gt:{:.3f}, cal:{:.3f}".format(ind+1, len(gt_burden_dict), key, gt_burden, cal_burden))