Example #1
0
def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256):
    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))
        img_path = os.path.join(imgs_dir, ele)
        img_name = os.path.splitext(ele)[0]
        json_path = os.path.join(imgs_dir, img_name+".json")

        if not (os.path.exists(img_path) and os.path.exists(json_path)):
            print("File not available")

        img = io.imread(img_path)
        anno_dict = format.json_to_dict(json_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y
                patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :]
                    patch_img = transform.resize(patch_img, (256, 256))
                    patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']]))
                    if os.path.exists(patch_cat_dir) == False:
                        os.makedirs(patch_cat_dir)
                    patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png')
                    io.imsave(patch_path, patch_img)
Example #2
0
def gen_gist_features(roi_dir, fea_dir, mode, args):
    fea_dir = os.path.join(fea_dir, args.model_name, mode)
    data_dir = os.path.join(roi_dir, mode)
    img_list = [ele for ele in os.listdir(data_dir) if "png" in ele]

    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))

        cur_img_path = os.path.join(data_dir, ele)
        img_name = os.path.splitext(ele)[0]
        cur_anno_path = os.path.join(data_dir, img_name + ".json")

        if not (os.path.exists(cur_img_path)
                and os.path.exists(cur_anno_path)):
            print("File not available")

        img = io.imread(cur_img_path)
        anno_dict = format.json_to_dict(cur_anno_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            region_label = str(label_map[cur_anno['label']])
            region_name = "_".join([img_name, 'r' + cur_r])
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 args.patch_size,
                                                 overlap_flag=True)

            Feas, BBoxes = [], []
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
                patch_center = Point(patch_start_w + args.patch_size / 2,
                                     patch_start_h + args.patch_size / 2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h +
                                    args.patch_size,
                                    patch_start_w:patch_start_w +
                                    args.patch_size, :]
                    patch_desp = gist.extract(patch_img)
                    Feas.append(patch_desp)
                    BBoxes.append([
                        patch_start_h, patch_start_w, args.patch_size,
                        args.patch_size
                    ])
            fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)}

            # save features
            cat_fea_dir = os.path.join(fea_dir, region_label)
            if not os.path.exists(cat_fea_dir):
                os.makedirs(cat_fea_dir)
            dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"),
                       fea_dict)
Example #3
0
def gen_patches(img_dir, patch_dir, patch_size=448):
    img_list = pydaily.filesystem.find_ext_files(img_dir, "jpg")
    img_list = [os.path.basename(ele) for ele in img_list]
    pos_patch_dir = os.path.join(patch_dir, "1Pos")
    if not os.path.exists(pos_patch_dir):
        os.makedirs(pos_patch_dir)
    neg_patch_dir = os.path.join(patch_dir, "0Neg")
    if not os.path.exists(neg_patch_dir):
        os.makedirs(neg_patch_dir)

    pos_num, neg_num = 0, 0
    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))
        img_path = os.path.join(img_dir, ele)
        mask_path = os.path.join(img_dir, os.path.splitext(ele)[0] + ".png")
        cur_img = io.imread(img_path)
        cur_mask = io.imread(mask_path)
        # split coors and save patches
        coors_arr = patch.wsi_coor_splitting(cur_img.shape[0],
                                             cur_img.shape[1],
                                             patch_size,
                                             overlap_flag=True)
        for coor in coors_arr:
            start_h, start_w = coor[0], coor[1]
            patch_img = cur_img[start_h:start_h + patch_size,
                                start_w:start_w + patch_size]
            # image background control
            if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) > 0.88:
                continue
            # mask control
            patch_mask = cur_mask[start_h:start_h + patch_size,
                                  start_w:start_w + patch_size]
            pixel_ratio = np.sum(patch_mask > 0) * 1.0 / patch_mask.size

            patch_name = str(uuid.uuid4())[:8]
            if pixel_ratio >= 0.05:
                io.imsave(os.path.join(pos_patch_dir, patch_name + ".png"),
                          patch_img)
                pos_num += 1
            else:
                if np.random.random_sample() <= 0.80 and "neg" in img_dir:
                    continue
                io.imsave(os.path.join(neg_patch_dir, patch_name + ".png"),
                          patch_img)
                neg_num += 1

    print("There are {} pos samples and {} neg samples".format(
        pos_num, neg_num))
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256):
    img_path = os.path.join(slide_dir, slide_name + ".png")
    json_path = os.path.join(slide_dir, slide_name + ".json")
    if not (os.path.exists(img_path) and os.path.exists(json_path)):
        print("File not available")

    img = io.imread(img_path)
    anno_dict = format.json_to_dict(json_path)
    for cur_r in anno_dict:
        cur_anno = anno_dict[cur_r]
        x_coors, y_coors = cur_anno['w'], cur_anno['h']
        cnt_arr = np.zeros((2, len(x_coors)), np.int32)
        cnt_arr[0], cnt_arr[1] = y_coors, x_coors
        poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

        start_x, start_y = min(x_coors), min(y_coors)
        cnt_w = max(x_coors) - start_x + 1
        cnt_h = max(y_coors) - start_y + 1
        coors_arr = patch.wsi_coor_splitting(cnt_h,
                                             cnt_w,
                                             patch_size,
                                             overlap_flag=True)
        for cur_h, cur_w in coors_arr:
            patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
            patch_center = Point(patch_start_w + patch_size / 2,
                                 patch_start_h + patch_size / 2)
            if patch_center.within(poly_cnt) == True:
                patch_img = img[patch_start_h:patch_start_h + patch_size,
                                patch_start_w:patch_start_w + patch_size, :]
                patch_cat_dir = os.path.join(patch_dir,
                                             str(label_map[cur_anno['label']]))
                if os.path.exists(patch_cat_dir) == False:
                    os.makedirs(patch_cat_dir)
                patch_path = os.path.join(patch_cat_dir,
                                          str(uuid.uuid4())[:8] + '.png')
                io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ind, ele in enumerate(slide_list):
        if ind > 0 and ind % 20 == 0:
            print("Processing {:3d}/{}".format(ind, len(slide_list)))
        slide_name = os.path.splitext(ele)[0]
        json_path = os.path.join(annotation_dir, slide_name + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            continue

        slide_path = os.path.join(slides_dir, ele)
        slide_head = openslide.OpenSlide(slide_path)
        level_dim = slide_head.level_dimensions[level]
        img_w, img_h = level_dim

        new_anno_dict = {}
        for cur_r in region_annos:
            cur_cnt = region_annos[cur_r]['cnts']
            cur_desp = region_annos[cur_r]['desp']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps), np.int32)
            cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']]
            cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']]
            if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0:
                continue
            if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w:
                continue

            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)
            start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1])
            cnt_h = np.max(cnt_arr[0]) - start_h + 1
            cnt_w = np.max(cnt_arr[1]) - start_w + 1

            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 size,
                                                 overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_center = Point(cur_w + start_w + size / 2,
                                     cur_h + start_h + size / 2)
                if patch_center.within(poly_cnt) == True:
                    new_anno_dict[cur_r] = {
                        'label': cur_desp,
                        'h': cnt_arr[0].tolist(),
                        'w': cnt_arr[1].tolist()
                    }
                    break
        if len(new_anno_dict) > 0:
            wsi_img = slide_head.read_region((0, 0), level, level_dim)
            wsi_img = np.array(wsi_img)[:, :, :3]
            io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img)
            format.dict_to_json(new_anno_dict,
                                os.path.join(level_dir, slide_name + ".json"))
        else:
            print("---{} have no proper regions---".format(slide_name))
Example #6
0
def test_wsi_coor_splitting():
    coors_arr = patch.wsi_coor_splitting(wsi_h=1536, wsi_w=2048, length=224, overlap_flag=True)