예제 #1
0
def test_patch_bk_ratio():
    img_path = os.path.join(PRJ_PATH, "test/data/Images/3c32efd9.png")
    img = io.imread(img_path)

    bk_ratio = patch.patch_bk_ratio(img, bk_thresh=0.80)
    if bk_ratio > 1 or bk_ratio < 0:
        raise AssertionError("Ratio not in the range.")
예제 #2
0
def gen_wsi_feas(patch_model, img_dir, fea_dir, args):
    img_list = [ele for ele in os.listdir(img_dir) if "jpg" in ele]
    for ind, ele in enumerate(img_list):
        img_name = os.path.splitext(ele)[0]
        if ind > 0 and ind % 10 == 0:
            print("processing {:03d}/{:03d}, {}".format(ind, len(img_list), img_name))
        feas_list, probs_list, coor_list = [], [], []

        img_path = os.path.join(img_dir, ele)
        cur_img = io.imread(img_path)
        # split coors and save patches
        coors_arr = wsi_stride_splitting(cur_img.shape[0], cur_img.shape[1], args.patch_len, args.stride_len)
        patch_list = []
        for ind, coor in enumerate(coors_arr):
            start_h, start_w = coor[0], coor[1]
            patch_img = cur_img[start_h:start_h+args.patch_len, start_w:start_w+args.patch_len]
            # image background control
            if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) <= 0.88:
                patch_list.append(patch_img)
                coor_list.append([start_h, start_w, start_h+args.patch_len, start_w+args.patch_len])
            # Processing the feature extraction in batch-wise manner to avoid huge memory consumption
            if len(patch_list) == 16 or ind+1 == len(coors_arr):
                patch_arr = np.asarray(patch_list)
                patch_dset = PatchDataset(patch_arr)
                patch_loader = DataLoader(patch_dset, batch_size=16, shuffle=False, num_workers=4, drop_last=False)
                with torch.no_grad():
                    for inputs in patch_loader:
                        batch_tensor = Variable(inputs.cuda())
                        feas, probs = extract_model_feas(patch_model, batch_tensor, args)
                        batch_feas = feas.cpu().data.numpy().tolist()
                        batch_probs = probs.cpu().data.numpy().tolist()
                        feas_list.extend(batch_feas)
                        probs_list.extend(batch_probs)
                patch_list = []

        all_feas = np.asarray(feas_list).astype(np.float32)
        all_probs = np.asarray(probs_list).astype(np.float32)
        all_coors = np.asarray(coor_list).astype(np.float32)
        sorted_ind = np.argsort(all_probs[:, 0])
        sorted_feas = all_feas[sorted_ind]
        sorted_probs = all_probs[sorted_ind]
        sorted_coors = all_coors[sorted_ind]

        if len(feas_list) != len(probs_list) or len(feas_list) != len(coor_list):
            print("{} feas/probs/coors not consistent.".format(img_name))
        else:
            patch_fea_dict = {
                "feas": sorted_feas,
                "probs": sorted_probs,
                "coors": sorted_coors,
            }
            if not os.path.exists(args.fea_dir):
                os.makedirs(args.fea_dir)
            dd.io.save(os.path.join(args.fea_dir, img_name+".h5"), patch_fea_dict)
def gen_wsi_feas(patch_model, img_path, args):
    img_name = os.path.splitext(img_path)[0]
    feas_list, probs_list, coor_list = [], [], []

    cur_img = io.imread(img_path)
    # split coors and save patches
    coors_arr = wsi_stride_splitting(cur_img.shape[0], cur_img.shape[1],
                                     args.patch_len, args.stride_len)
    patch_list = []
    for ind, coor in enumerate(coors_arr):
        start_h, start_w = coor[0], coor[1]
        patch_img = cur_img[start_h:start_h + args.patch_len,
                            start_w:start_w + args.patch_len]
        # image background control
        if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) <= 0.88:
            patch_list.append(patch_img)
            coor_list.append([
                start_h, start_w, start_h + args.patch_len,
                start_w + args.patch_len
            ])

        # Processing the feature extraction in batch-wise manner to avoid huge memory consumption
        if len(patch_list) == args.cls_batch_size or ind + 1 == len(coors_arr):
            patch_arr = np.asarray(patch_list)
            patch_dset = ClsPatchDataset(patch_arr)
            patch_loader = DataLoader(patch_dset,
                                      batch_size=args.cls_batch_size,
                                      shuffle=False,
                                      num_workers=0,
                                      drop_last=False)
            with torch.no_grad():
                for inputs in patch_loader:
                    batch_tensor = Variable(inputs.cuda())
                    feas, probs = extract_model_feas(patch_model, batch_tensor,
                                                     args)
                    batch_feas = feas.cpu().data.numpy().tolist()
                    batch_probs = probs.cpu().data.numpy().tolist()
                    feas_list.extend(batch_feas)
                    probs_list.extend(batch_probs)
            patch_list = []

    all_feas = np.asarray(feas_list).astype(np.float32)
    all_probs = np.asarray(probs_list).astype(np.float32)
    sorted_ind = np.argsort(all_probs[:, 0])

    feas_placeholder = np.zeros((args.wsi_patch_num, all_feas.shape[1]),
                                dtype=np.float32)
    test_patch_num = min(len(all_feas), args.wsi_patch_num)
    chosen_total_ind = sorted_ind[:test_patch_num]
    feas_placeholder[:test_patch_num] = all_feas[chosen_total_ind]
    chosen_coors = np.asarray(coor_list)[chosen_total_ind].tolist()
    return feas_placeholder, test_patch_num, chosen_coors
예제 #4
0
def gen_patches(img_dir, patch_dir, patch_size=448):
    img_list = pydaily.filesystem.find_ext_files(img_dir, "jpg")
    img_list = [os.path.basename(ele) for ele in img_list]
    pos_patch_dir = os.path.join(patch_dir, "1Pos")
    if not os.path.exists(pos_patch_dir):
        os.makedirs(pos_patch_dir)
    neg_patch_dir = os.path.join(patch_dir, "0Neg")
    if not os.path.exists(neg_patch_dir):
        os.makedirs(neg_patch_dir)

    pos_num, neg_num = 0, 0
    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))
        img_path = os.path.join(img_dir, ele)
        mask_path = os.path.join(img_dir, os.path.splitext(ele)[0] + ".png")
        cur_img = io.imread(img_path)
        cur_mask = io.imread(mask_path)
        # split coors and save patches
        coors_arr = patch.wsi_coor_splitting(cur_img.shape[0],
                                             cur_img.shape[1],
                                             patch_size,
                                             overlap_flag=True)
        for coor in coors_arr:
            start_h, start_w = coor[0], coor[1]
            patch_img = cur_img[start_h:start_h + patch_size,
                                start_w:start_w + patch_size]
            # image background control
            if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) > 0.88:
                continue
            # mask control
            patch_mask = cur_mask[start_h:start_h + patch_size,
                                  start_w:start_w + patch_size]
            pixel_ratio = np.sum(patch_mask > 0) * 1.0 / patch_mask.size

            patch_name = str(uuid.uuid4())[:8]
            if pixel_ratio >= 0.05:
                io.imsave(os.path.join(pos_patch_dir, patch_name + ".png"),
                          patch_img)
                pos_num += 1
            else:
                if np.random.random_sample() <= 0.80 and "neg" in img_dir:
                    continue
                io.imsave(os.path.join(neg_patch_dir, patch_name + ".png"),
                          patch_img)
                neg_num += 1

    print("There are {} pos samples and {} neg samples".format(
        pos_num, neg_num))
예제 #5
0
def gen_samples(slides_dir, patch_level, patch_size, tumor_type, slide_list, dset, overlap_mode):
    # prepare saving directory
    patch_path = os.path.join(os.path.dirname(slides_dir), "Patches", tumor_type)
    patch_img_dir = os.path.join(patch_path, dset, "imgs")
    if not os.path.exists(patch_img_dir):
        os.makedirs(patch_img_dir)
    patch_mask_dir = os.path.join(patch_path, dset, "masks")
    if not os.path.exists(patch_mask_dir):
        os.makedirs(patch_mask_dir)

    # processing slide one-by-one
    ttl_patch = 0
    slide_list.sort()
    for ind, ele in enumerate(slide_list):
        print("Processing {} {}/{}".format(ele, ind+1, len(slide_list)))
        cur_slide_path = os.path.join(slides_dir, ele+".svs")
        if os.path.exists(cur_slide_path):
            cur_slide_path = os.path.join(slides_dir, ele+".svs")

        # locate contours and generate batches based on tissue contours
        cnts, d_factor = tl.locate_tissue_cnts(cur_slide_path, max_img_size=2048, smooth_sigma=13,
                                               thresh_val=0.88, min_tissue_size=120000)
        select_level, select_factor = tl.select_slide_level(cur_slide_path, max_size=2048)
        cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True)

        # scale contour to slide level 2
        wsi_head = pyramid.load_wsi_head(cur_slide_path)
        cnt_scale = select_factor / int(wsi_head.level_downsamples[patch_level])
        tissue_arr = cv_cnt_to_np_arr(cnts[0] * cnt_scale).astype(np.int32)
        # convert tissue_arr to convex if poly is not valid
        tissue_poly = np_arr_to_poly(tissue_arr)
        if tissue_poly.is_valid == False:
            tissue_arr = poly_to_np_arr(tissue_poly.convex_hull).astype(int)

        coors_arr = None
        if overlap_mode == "half_overlap":
            level_w, level_h = wsi_head.level_dimensions[patch_level]
            coors_arr = contour.contour_patch_splitting_half_overlap(tissue_arr, level_h, level_w, patch_size, inside_ratio=0.80)
        elif overlap_mode == "self_overlap":
            coors_arr = contour.contour_patch_splitting_self_overlap(tissue_arr, patch_size, inside_ratio=0.80)
        else:
            raise NotImplementedError("unknown overlapping mode")

        wsi_img = wsi_head.read_region((0, 0), patch_level, wsi_head.level_dimensions[patch_level])
        wsi_img = np.asarray(wsi_img)[:,:,:3]
        mask_path = os.path.join(slides_dir, "_".join([ele, tumor_type+".tif"]))
        mask_img = io.imread(mask_path)
        wsi_mask = (transform.resize(mask_img, wsi_img.shape[:2], order=0) * 255).astype(np.uint8) * 255

        if dset == "val":
            test_slides_dir = os.path.join(os.path.dirname(slides_dir), "TestSlides")
            if not os.path.exists(os.path.join(test_slides_dir, cur_slide_path)):
                shutil.copy(cur_slide_path, test_slides_dir)
            if not os.path.exists(os.path.join(test_slides_dir, mask_path)):
                shutil.copy(mask_path, test_slides_dir)

        for cur_arr in coors_arr:
            cur_h, cur_w = cur_arr[0], cur_arr[1]
            cur_patch = wsi_img[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size]
            if cur_patch.shape[0] != patch_size or cur_patch.shape[1] != patch_size:
                continue
            cur_mask = wsi_mask[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size]
            # background RGB (235, 210, 235) * [0.299, 0.587, 0.114]
            if patch.patch_bk_ratio(cur_patch, bk_thresh=0.864) > 0.88:
                continue

            if overlap_mode == "half_overlap" and tumor_type == "viable":
                pixel_ratio = np.sum(cur_mask > 0) * 1.0 / cur_mask.size
                if pixel_ratio < 0.05:
                    continue

            patch_name = ele + "_" + str(uuid.uuid1())[:8]
            io.imsave(os.path.join(patch_img_dir, patch_name+".jpg"), cur_patch)
            io.imsave(os.path.join(patch_mask_dir, patch_name+".png"), cur_mask)
            ttl_patch += 1

    print("There are {} patches in total.".format(ttl_patch))