def gen_contour_overlay(slides_dir, annotation_dir, overlap_dir, img_level=4):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ele in slide_list:
        slide_path = os.path.join(slides_dir, ele)
        slide_head = openslide.OpenSlide(slide_path)
        wsi_img = slide_head.read_region(
            (0, 0), img_level, slide_head.level_dimensions[img_level])
        wsi_img = np.ascontiguousarray(np.array(wsi_img)[:, :, :3])
        json_path = os.path.join(annotation_dir,
                                 os.path.splitext(ele)[0] + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            print("Not annotated regions in {}".format(ele))

        for cur_r in region_annos:
            r_desp = region_annos[cur_r]['desp']
            cur_cnt = region_annos[cur_r]['cnts']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps), np.float32)
            cnt_arr[0] = cur_cnt['h'] / np.power(2, img_level)
            cnt_arr[1] = cur_cnt['w'] / np.power(2, img_level)
            cv_cnt = cv2_transform.np_arr_to_cv_cnt(cnt_arr).astype(np.int32)
            cv2.drawContours(wsi_img, [cv_cnt], 0, (0, 255, 0), 3)
            tl_pos = (int(np.mean(cnt_arr[1])), int(np.mean(cnt_arr[0])))
            cv2.putText(wsi_img, r_desp, tl_pos, cv2.FONT_HERSHEY_SIMPLEX, 3,
                        (0, 255, 0), 3, cv2.LINE_AA)
        overlay_path = os.path.join(overlap_dir,
                                    os.path.splitext(ele)[0] + ".png")
        io.imsave(overlay_path, wsi_img)
Beispiel #2
0
def gen_gist_features(roi_dir, fea_dir, mode, args):
    fea_dir = os.path.join(fea_dir, args.model_name, mode)
    data_dir = os.path.join(roi_dir, mode)
    img_list = [ele for ele in os.listdir(data_dir) if "png" in ele]

    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))

        cur_img_path = os.path.join(data_dir, ele)
        img_name = os.path.splitext(ele)[0]
        cur_anno_path = os.path.join(data_dir, img_name + ".json")

        if not (os.path.exists(cur_img_path)
                and os.path.exists(cur_anno_path)):
            print("File not available")

        img = io.imread(cur_img_path)
        anno_dict = format.json_to_dict(cur_anno_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            region_label = str(label_map[cur_anno['label']])
            region_name = "_".join([img_name, 'r' + cur_r])
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 args.patch_size,
                                                 overlap_flag=True)

            Feas, BBoxes = [], []
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
                patch_center = Point(patch_start_w + args.patch_size / 2,
                                     patch_start_h + args.patch_size / 2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h +
                                    args.patch_size,
                                    patch_start_w:patch_start_w +
                                    args.patch_size, :]
                    patch_desp = gist.extract(patch_img)
                    Feas.append(patch_desp)
                    BBoxes.append([
                        patch_start_h, patch_start_w, args.patch_size,
                        args.patch_size
                    ])
            fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)}

            # save features
            cat_fea_dir = os.path.join(fea_dir, region_label)
            if not os.path.exists(cat_fea_dir):
                os.makedirs(cat_fea_dir)
            dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"),
                       fea_dict)
def check_contour_valid(slides_dir, annotation_dir):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ele in slide_list:
        # slide_path = os.path.join(slides_dir, ele)
        json_path = os.path.join(annotation_dir,
                                 os.path.splitext(ele)[0] + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            print("Not annotated regions in {}".format(ele))

        for cur_r in region_annos:
            cur_cnt = region_annos[cur_r]['cnts']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps))
            cnt_arr[0] = cur_cnt['h']
            cnt_arr[1] = cur_cnt['w']
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)
            center_point = Point(np.mean(cnt_arr[1]), np.mean(cnt_arr[0]))
            if center_point.within(poly_cnt) == False:
                print("{} in {}".format(cur_r, ele))
Beispiel #4
0
def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256):
    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))
        img_path = os.path.join(imgs_dir, ele)
        img_name = os.path.splitext(ele)[0]
        json_path = os.path.join(imgs_dir, img_name+".json")

        if not (os.path.exists(img_path) and os.path.exists(json_path)):
            print("File not available")

        img = io.imread(img_path)
        anno_dict = format.json_to_dict(json_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y
                patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :]
                    patch_img = transform.resize(patch_img, (256, 256))
                    patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']]))
                    if os.path.exists(patch_cat_dir) == False:
                        os.makedirs(patch_cat_dir)
                    patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png')
                    io.imsave(patch_path, patch_img)
def annotate_images(data_dir, cur_set, cur_cat, slide_level):
    slides_dir = os.path.join(data_dir, "Slides", cur_set, cur_cat)
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]

    annotation_dir = os.path.join(data_dir, "Annotations", cur_set, cur_cat)
    l2_annotate_dir = os.path.join(data_dir, "L4AnnotatedImgs", cur_set, cur_cat)
    filesystem.overwrite_dir(l2_annotate_dir)

    for cur_slide in slide_list:
        slide_name = os.path.splitext(cur_slide)[0]
        slide_head = openslide.OpenSlide(os.path.join(slides_dir, cur_slide))
        slide_img = slide_head.read_region(location=(0, 0), level=slide_level,
                                           size=slide_head.level_dimensions[slide_level])
        slide_img = np.asarray(slide_img)[:, :, :3]

        annotate_json_path =  os.path.join(annotation_dir, slide_name+".json")
        annotation_dict = format.json_to_dict(annotate_json_path)
        region_dict = annotation_dict['regions']
        for key in region_dict.keys():
            cur_label = region_dict[key]['desp']
            draw_rgb = None
            if cur_label == "Benign":
                draw_rgb = (0, 0, 255)
            elif cur_label == "Uncertain":
                draw_rgb = (0, 255, 0)
            elif cur_label == "Malignant":
                draw_rgb = (255, 0, 0)
            else:
                print("Unknow description: {}".format(cur_label))
                continue

            cur_cnts = region_dict[key]['cnts']
            num_points = len(cur_cnts["h"])
            points_coors = np.zeros((2, num_points), dtype=np.int32)
            for ind in range(num_points):
                points_coors[0, ind] = int(round(cur_cnts['h'][ind] / np.power(2, slide_level)))
                points_coors[1, ind] = int(round(cur_cnts['w'][ind] / np.power(2, slide_level)))
            slide_img = combine.overlay_contour(slide_img, points_coors, draw_rgb, cnt_width=5)
            tl_pos = (int(np.mean(points_coors[0])), int(np.mean(points_coors[1])))
            cv2.putText(slide_img, cur_label, tl_pos, cv2.FONT_HERSHEY_SIMPLEX, 3, (148,24,32), 3, cv2.LINE_AA)
        annotate_slide_path = os.path.join(l2_annotate_dir, slide_name+".png")
        io.imsave(annotate_slide_path, slide_img)
def save_wsi_annotation(slide_path, json_path, h5_path, slide_level):
    slide_head = openslide.OpenSlide(slide_path)
    region_dict = {}
    annotation_dict = format.json_to_dict(json_path)
    regions = annotation_dict['regions']
    for region_id in regions.keys():
        region_name = 'r' + str(region_id)
        cur_region = {}

        if regions[region_id]['desp'] == "Benign":
            cur_region['desp'] = "1Benign"
        elif regions[region_id]['desp'] == "Uncertain":
            cur_region['desp'] = "2Uncertain"
        elif regions[region_id]['desp'] == "Malignant":
            cur_region['desp'] = "3Malignant"
        else:
            print("Unknow description: {}".format(regions[region_id]['desp']))
            continue

        cur_cnts = regions[region_id]['cnts']
        num_points = len(cur_cnts["h"])
        points_coors = np.zeros((2, num_points), dtype=np.int32)
        for ind in range(num_points):
            points_coors[0, ind] = int(
                round(cur_cnts['h'][ind] / np.power(2, slide_level)))
            points_coors[1, ind] = int(
                round(cur_cnts['w'][ind] / np.power(2, slide_level)))
        cur_region['cnts'] = points_coors
        start_h, start_w = np.min(points_coors[0, :]), np.min(
            points_coors[1, :])
        region_h = np.max(points_coors[0, :]) - start_h + 1
        region_w = np.max(points_coors[1, :]) - start_w + 1
        region_img = slide_head.read_region(location=(start_w, start_h),
                                            level=slide_level,
                                            size=(region_w, region_h))
        region_img = np.asarray(region_img)[:, :, :3]
        cur_region['img'] = region_img
        region_dict[region_name] = cur_region
    dd.io.save(h5_path, region_dict)
def gen_contour_overlay(slides_dir, annotation_dir, overlap_dir, img_level=4, save_roi=False, save_wsi=True):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    filesystem.overwrite_dir(overlap_dir)

    for ele in slide_list:
        slide_path = os.path.join(slides_dir, ele)
        slide_head = openslide.OpenSlide(slide_path)
        wsi_img = slide_head.read_region((0, 0), img_level, slide_head.level_dimensions[img_level])
        wsi_img = np.ascontiguousarray(np.array(wsi_img)[:,:,:3])
        json_path = os.path.join(annotation_dir, os.path.splitext(ele)[0] + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            print("Not annotated regions in {}".format(ele))

        for cur_r in region_annos:
            r_desp = region_annos[cur_r]['desp']
            if r_desp != "Malignant":
                continue
            cur_cnt = region_annos[cur_r]['cnts']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps), np.float32)
            cnt_arr[0] = cur_cnt['h'] / np.power(2, img_level)
            cnt_arr[1] = cur_cnt['w'] / np.power(2, img_level)
            cv_cnt = cv2_transform.np_arr_to_cv_cnt(cnt_arr).astype(np.int32)
            cv2.drawContours(wsi_img, [cv_cnt], 0, (0, 255, 0), 3)
            if save_roi == True:
                overlay_roi_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + "_r" + cur_r + ".png")
                start_h, end_h = int(min(cnt_arr[0])), int(max(cnt_arr[0]))
                start_w, end_w = int(min(cnt_arr[1])), int(max(cnt_arr[1]))
                io.imsave(overlay_roi_path, wsi_img[start_h:end_h, start_w:end_w])

        if save_wsi == True:
            overlay_path = os.path.join(overlap_dir, os.path.splitext(ele)[0] + ".png")
            io.imsave(overlay_path, wsi_img)
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256):
    img_path = os.path.join(slide_dir, slide_name + ".png")
    json_path = os.path.join(slide_dir, slide_name + ".json")
    if not (os.path.exists(img_path) and os.path.exists(json_path)):
        print("File not available")

    img = io.imread(img_path)
    anno_dict = format.json_to_dict(json_path)
    for cur_r in anno_dict:
        cur_anno = anno_dict[cur_r]
        x_coors, y_coors = cur_anno['w'], cur_anno['h']
        cnt_arr = np.zeros((2, len(x_coors)), np.int32)
        cnt_arr[0], cnt_arr[1] = y_coors, x_coors
        poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

        start_x, start_y = min(x_coors), min(y_coors)
        cnt_w = max(x_coors) - start_x + 1
        cnt_h = max(y_coors) - start_y + 1
        coors_arr = patch.wsi_coor_splitting(cnt_h,
                                             cnt_w,
                                             patch_size,
                                             overlap_flag=True)
        for cur_h, cur_w in coors_arr:
            patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
            patch_center = Point(patch_start_w + patch_size / 2,
                                 patch_start_h + patch_size / 2)
            if patch_center.within(poly_cnt) == True:
                patch_img = img[patch_start_h:patch_start_h + patch_size,
                                patch_start_w:patch_start_w + patch_size, :]
                patch_cat_dir = os.path.join(patch_dir,
                                             str(label_map[cur_anno['label']]))
                if os.path.exists(patch_cat_dir) == False:
                    os.makedirs(patch_cat_dir)
                patch_path = os.path.join(patch_cat_dir,
                                          str(uuid.uuid4())[:8] + '.png')
                io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ind, ele in enumerate(slide_list):
        if ind > 0 and ind % 20 == 0:
            print("Processing {:3d}/{}".format(ind, len(slide_list)))
        slide_name = os.path.splitext(ele)[0]
        json_path = os.path.join(annotation_dir, slide_name + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            continue

        slide_path = os.path.join(slides_dir, ele)
        slide_head = openslide.OpenSlide(slide_path)
        level_dim = slide_head.level_dimensions[level]
        img_w, img_h = level_dim

        new_anno_dict = {}
        for cur_r in region_annos:
            cur_cnt = region_annos[cur_r]['cnts']
            cur_desp = region_annos[cur_r]['desp']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps), np.int32)
            cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']]
            cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']]
            if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0:
                continue
            if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w:
                continue

            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)
            start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1])
            cnt_h = np.max(cnt_arr[0]) - start_h + 1
            cnt_w = np.max(cnt_arr[1]) - start_w + 1

            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 size,
                                                 overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_center = Point(cur_w + start_w + size / 2,
                                     cur_h + start_h + size / 2)
                if patch_center.within(poly_cnt) == True:
                    new_anno_dict[cur_r] = {
                        'label': cur_desp,
                        'h': cnt_arr[0].tolist(),
                        'w': cnt_arr[1].tolist()
                    }
                    break
        if len(new_anno_dict) > 0:
            wsi_img = slide_head.read_region((0, 0), level, level_dim)
            wsi_img = np.array(wsi_img)[:, :, :3]
            io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img)
            format.dict_to_json(new_anno_dict,
                                os.path.join(level_dir, slide_name + ".json"))
        else:
            print("---{} have no proper regions---".format(slide_name))

if __name__ == "__main__":
    # extract prepared ground truth viable tumor burden
    source_slides_dir = "../data/SourceData"
    phase1_path = os.path.join(source_slides_dir, "Phase_1_tumor_burden.csv")
    phase2_path = os.path.join(source_slides_dir, "Phase_2_tumor_burden.csv")
    gt_burden_dict = {}
    phase1_burden_dict = extract_csv_burden(phase1_path, case_num=20)
    gt_burden_dict.update(phase1_burden_dict)
    phase2_burden_dict = extract_csv_burden(phase2_path, case_num=30)
    gt_burden_dict.update(phase2_burden_dict)

    # get calculate viable tumor burden
    slides_dir = os.path.join(os.path.dirname(source_slides_dir), "LiverImages")
    cal_train_burden(slides_dir)

    # load calcualted burden
    cal_burden_path = os.path.join(source_slides_dir, "calculated_tumor_burden.json")
    cal_burden_dict = format.json_to_dict(cal_burden_path)

    # compare gt & cal
    for ind, key in enumerate(gt_burden_dict):
        if key not in cal_burden_dict:
            print("Error: {}".format(key))
        gt_burden = gt_burden_dict[key]
        cal_burden = cal_burden_dict[key]
        if np.absolute(gt_burden-cal_burden) > 0.001:
            print("{}/{} {} gt:{:.3f}, cal:{:.3f}".format(ind+1, len(gt_burden_dict), key,
                                                  gt_burden, cal_burden))