Exemple #1
0
def gen_patches(imgs_dir, patch_dir, img_list, dset, patch_size=256):
    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))
        img_path = os.path.join(imgs_dir, ele)
        img_name = os.path.splitext(ele)[0]
        json_path = os.path.join(imgs_dir, img_name+".json")

        if not (os.path.exists(img_path) and os.path.exists(json_path)):
            print("File not available")

        img = io.imread(img_path)
        anno_dict = format.json_to_dict(json_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h, cnt_w, patch_size, overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w+start_x, cur_h + start_y
                patch_center = Point(patch_start_w+patch_size/2, patch_start_h+patch_size/2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h+patch_size, patch_start_w:patch_start_w+patch_size, :]
                    patch_img = transform.resize(patch_img, (256, 256))
                    patch_cat_dir = os.path.join(patch_dir, dset, str(label_map[cur_anno['label']]))
                    if os.path.exists(patch_cat_dir) == False:
                        os.makedirs(patch_cat_dir)
                    patch_path = os.path.join(patch_cat_dir, str(uuid.uuid4())[:8] + '.png')
                    io.imsave(patch_path, patch_img)
def check_contour_valid(slides_dir, annotation_dir):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ele in slide_list:
        # slide_path = os.path.join(slides_dir, ele)
        json_path = os.path.join(annotation_dir,
                                 os.path.splitext(ele)[0] + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            print("Not annotated regions in {}".format(ele))

        for cur_r in region_annos:
            cur_cnt = region_annos[cur_r]['cnts']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps))
            cnt_arr[0] = cur_cnt['h']
            cnt_arr[1] = cur_cnt['w']
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)
            center_point = Point(np.mean(cnt_arr[1]), np.mean(cnt_arr[0]))
            if center_point.within(poly_cnt) == False:
                print("{} in {}".format(cur_r, ele))
def gen_gist_features(roi_dir, fea_dir, mode, args):
    fea_dir = os.path.join(fea_dir, args.model_name, mode)
    data_dir = os.path.join(roi_dir, mode)
    img_list = [ele for ele in os.listdir(data_dir) if "png" in ele]

    for ind, ele in enumerate(img_list):
        if ind > 0 and ind % 10 == 0:
            print("processing {}/{}".format(ind, len(img_list)))

        cur_img_path = os.path.join(data_dir, ele)
        img_name = os.path.splitext(ele)[0]
        cur_anno_path = os.path.join(data_dir, img_name + ".json")

        if not (os.path.exists(cur_img_path)
                and os.path.exists(cur_anno_path)):
            print("File not available")

        img = io.imread(cur_img_path)
        anno_dict = format.json_to_dict(cur_anno_path)
        for cur_r in anno_dict:
            cur_anno = anno_dict[cur_r]
            region_label = str(label_map[cur_anno['label']])
            region_name = "_".join([img_name, 'r' + cur_r])
            x_coors, y_coors = cur_anno['w'], cur_anno['h']
            cnt_arr = np.zeros((2, len(x_coors)), np.int32)
            cnt_arr[0], cnt_arr[1] = y_coors, x_coors
            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

            start_x, start_y = min(x_coors), min(y_coors)
            cnt_w = max(x_coors) - start_x + 1
            cnt_h = max(y_coors) - start_y + 1
            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 args.patch_size,
                                                 overlap_flag=True)

            Feas, BBoxes = [], []
            for cur_h, cur_w in coors_arr:
                patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
                patch_center = Point(patch_start_w + args.patch_size / 2,
                                     patch_start_h + args.patch_size / 2)
                if patch_center.within(poly_cnt) == True:
                    patch_img = img[patch_start_h:patch_start_h +
                                    args.patch_size,
                                    patch_start_w:patch_start_w +
                                    args.patch_size, :]
                    patch_desp = gist.extract(patch_img)
                    Feas.append(patch_desp)
                    BBoxes.append([
                        patch_start_h, patch_start_w, args.patch_size,
                        args.patch_size
                    ])
            fea_dict = {'feat': np.asarray(Feas), 'bbox': np.asarray(BBoxes)}

            # save features
            cat_fea_dir = os.path.join(fea_dir, region_label)
            if not os.path.exists(cat_fea_dir):
                os.makedirs(cat_fea_dir)
            dd.io.save(os.path.join(cat_fea_dir, region_name + ".h5"),
                       fea_dict)
def test_poly_transform():
    np_arr = np.array([[1., 2., 4., 5., 3.], [1., 3., 4., 2., 0.]])
    poly1 = np_arr_to_poly(np_arr)
    print("Bounds of poly1 is: ", poly1.exterior.bounds)
    point_list = np_arr_to_point_list(np_arr)
    poly2 = point_list_to_poly(point_list)
    print("Bounds of poly2 is: ", poly2.exterior.bounds)
    if poly1 != poly2:
        raise AssertionError("Conversion error")
    min_h, max_h = np.min(np_arr[0]), np.max(np_arr[0])
    min_w, max_w = np.min(np_arr[1]), np.max(np_arr[1])
    poly3 = bbox_to_poly(min_h, min_w, max_h, max_w)
    print("Bounds of poly3 is: ", poly3.exterior.bounds)
    # poly3.exterior.coords.xy
    poly1_arr = poly_to_np_arr(poly1)
    print("Numpy coordinates of poly1 is:")
    if not np.array_equal(np_arr, poly1_arr):
        raise AssertionError("Conversion error")
Exemple #5
0
def contour_valid(cnt_arr):
    """ Check contour is valid or not.

    Parameters
    -------
    cnt_arr: np.array
        contour with standard numpy 2d array format

    Returns
    -------
    valid: boolean
        True if valid, else False

    """

    poly = np_arr_to_poly(cnt_arr)
    valid = True if poly.is_valid else False

    return valid
def locate_tissue(slides_dir):
    slide_list = []
    svs_file_list = filesystem.find_ext_files(slides_dir, "svs")
    slide_list.extend(svs_file_list)
    SVS_file_list = filesystem.find_ext_files(slides_dir, "SVS")
    slide_list.extend(SVS_file_list)

    tissue_dir = os.path.join(os.path.dirname(slides_dir), "Visualization/TissueLoc")
    filesystem.overwrite_dir(tissue_dir)
    for ind, slide_path in enumerate(slide_list):
        print("processing {}/{}".format(ind+1, len(slide_list)))
        # locate tissue contours with default parameters
        cnts, d_factor = tl.locate_tissue_cnts(slide_path, max_img_size=2048, smooth_sigma=13,
                                               thresh_val=0.88, min_tissue_size=120000)
        cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True)

        # if len(cnts) != 1:
        #     print("There are {} contours in {}".format(len(cnts), os.path.basename(slide_path)))

        # load slide
        select_level, select_factor = tl.select_slide_level(slide_path, max_size=2048)
        wsi_head = pyramid.load_wsi_head(slide_path)
        slide_img = wsi_head.read_region((0, 0), select_level, wsi_head.level_dimensions[select_level])
        slide_img = np.asarray(slide_img)[:,:,:3]
        slide_img = np.ascontiguousarray(slide_img, dtype=np.uint8)

        # change not valid poly to convex_hull
        cnt_arr = cv_cnt_to_np_arr(cnts[0])
        cnt_poly = np_arr_to_poly(cnt_arr)
        if cnt_poly.is_valid == True:
            valid_cnt = cnts[0].astype(int)
        else:
            valid_arr = poly_to_np_arr(cnt_poly.convex_hull)
            valid_cnt = np_arr_to_cv_cnt(valid_arr).astype(int)
        cv2.drawContours(slide_img, [valid_cnt], 0, (0, 255, 0), 8)

        # overlay and save
        # cv2.drawContours(slide_img, cnts, 0, (0, 255, 0), 8)
        tissue_save_name = os.path.splitext(os.path.basename(slide_path))[0] + ".png"
        tissue_save_path = os.path.join(tissue_dir, tissue_save_name)
        io.imsave(tissue_save_path, slide_img)
Exemple #7
0
def contour_to_poly_valid(cnt_arr):
    """ Convert contour to poly valid if not poly valid

    Parameters
    -------
    cnt_arr: np.array
        contour with standard numpy 2d array format

    Returns
    -------
    cnt_valid_arr: np.array
        contour with standard numpy 2d array format

    """

    poly = np_arr_to_poly(cnt_arr)
    if poly.is_valid == True:
        cnt_valid_arr = cnt_arr
    else:
        cnt_valid_arr = poly_to_np_arr(poly.convex_hull)

    return cnt_valid_arr
def gen_slide_patches(slide_dir, slide_name, patch_dir, patch_size=256):
    img_path = os.path.join(slide_dir, slide_name + ".png")
    json_path = os.path.join(slide_dir, slide_name + ".json")
    if not (os.path.exists(img_path) and os.path.exists(json_path)):
        print("File not available")

    img = io.imread(img_path)
    anno_dict = format.json_to_dict(json_path)
    for cur_r in anno_dict:
        cur_anno = anno_dict[cur_r]
        x_coors, y_coors = cur_anno['w'], cur_anno['h']
        cnt_arr = np.zeros((2, len(x_coors)), np.int32)
        cnt_arr[0], cnt_arr[1] = y_coors, x_coors
        poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)

        start_x, start_y = min(x_coors), min(y_coors)
        cnt_w = max(x_coors) - start_x + 1
        cnt_h = max(y_coors) - start_y + 1
        coors_arr = patch.wsi_coor_splitting(cnt_h,
                                             cnt_w,
                                             patch_size,
                                             overlap_flag=True)
        for cur_h, cur_w in coors_arr:
            patch_start_w, patch_start_h = cur_w + start_x, cur_h + start_y
            patch_center = Point(patch_start_w + patch_size / 2,
                                 patch_start_h + patch_size / 2)
            if patch_center.within(poly_cnt) == True:
                patch_img = img[patch_start_h:patch_start_h + patch_size,
                                patch_start_w:patch_start_w + patch_size, :]
                patch_cat_dir = os.path.join(patch_dir,
                                             str(label_map[cur_anno['label']]))
                if os.path.exists(patch_cat_dir) == False:
                    os.makedirs(patch_cat_dir)
                patch_path = os.path.join(patch_cat_dir,
                                          str(uuid.uuid4())[:8] + '.png')
                io.imsave(patch_path, patch_img)
def gen_l2_data(slides_dir, annotation_dir, level_dir, level=2, size=256):
    slide_list = [ele for ele in os.listdir(slides_dir) if "tiff" in ele]
    json_list = [ele for ele in os.listdir(annotation_dir) if "json" in ele]
    if len(slide_list) != len(json_list):
        raise AssertionError("Annotation not complete")

    for ind, ele in enumerate(slide_list):
        if ind > 0 and ind % 20 == 0:
            print("Processing {:3d}/{}".format(ind, len(slide_list)))
        slide_name = os.path.splitext(ele)[0]
        json_path = os.path.join(annotation_dir, slide_name + ".json")
        anno_dict = format.json_to_dict(json_path)
        region_annos = anno_dict["regions"]
        if len(region_annos) <= 0:
            continue

        slide_path = os.path.join(slides_dir, ele)
        slide_head = openslide.OpenSlide(slide_path)
        level_dim = slide_head.level_dimensions[level]
        img_w, img_h = level_dim

        new_anno_dict = {}
        for cur_r in region_annos:
            cur_cnt = region_annos[cur_r]['cnts']
            cur_desp = region_annos[cur_r]['desp']
            num_ps = len(cur_cnt['h'])
            cnt_arr = np.zeros((2, num_ps), np.int32)
            cnt_arr[0] = [ele / np.power(2, level) for ele in cur_cnt['h']]
            cnt_arr[1] = [ele / np.power(2, level) for ele in cur_cnt['w']]
            if np.min(cnt_arr[0]) < 0 or np.min(cnt_arr[1]) < 0:
                continue
            if np.max(cnt_arr[0]) > img_h or np.max(cnt_arr[1]) > img_w:
                continue

            poly_cnt = poly_transform.np_arr_to_poly(cnt_arr)
            start_h, start_w = np.min(cnt_arr[0]), np.min(cnt_arr[1])
            cnt_h = np.max(cnt_arr[0]) - start_h + 1
            cnt_w = np.max(cnt_arr[1]) - start_w + 1

            coors_arr = patch.wsi_coor_splitting(cnt_h,
                                                 cnt_w,
                                                 size,
                                                 overlap_flag=True)
            for cur_h, cur_w in coors_arr:
                patch_center = Point(cur_w + start_w + size / 2,
                                     cur_h + start_h + size / 2)
                if patch_center.within(poly_cnt) == True:
                    new_anno_dict[cur_r] = {
                        'label': cur_desp,
                        'h': cnt_arr[0].tolist(),
                        'w': cnt_arr[1].tolist()
                    }
                    break
        if len(new_anno_dict) > 0:
            wsi_img = slide_head.read_region((0, 0), level, level_dim)
            wsi_img = np.array(wsi_img)[:, :, :3]
            io.imsave(os.path.join(level_dir, slide_name + ".png"), wsi_img)
            format.dict_to_json(new_anno_dict,
                                os.path.join(level_dir, slide_name + ".json"))
        else:
            print("---{} have no proper regions---".format(slide_name))
def gen_patches(slide_path, annotation_dict, args):
    """ Generate patch images and masks based on annotations as well as slide information.

    """

    # load slide header information
    slide_head = openslide.OpenSlide(slide_path)  # 读取图像
    slide_name = os.path.basename(slide_path)  # slide id
    # level_count——幻灯片中的级别数。级别从0(最高分辨率)到level_count - 1(最低分辨率)编号。
    if args.slide_level < 0 or args.slide_level >= slide_head.level_count:  # slide_head.level_count  = 4
        print("level {} not availabel in {}".format(args.slide_level,
                                                    slide_name))
        sys.exit()

    img_save_dir, mask_save_dir = get_save_dirs(args)

    for cur_reg in annotation_dict:
        # slide_head.level_downsamples[args.slide_level = 1.0
        # 原始坐标是(w,h)

        coords = (annotation_dict[cur_reg] /
                  slide_head.level_downsamples[args.slide_level]).astype(
                      np.int32)
        coords = np.transpose(np.array(coords))  # (2,428)
        coords[[0, 1]] = coords[[1, 0]]  # swap width and height
        min_h, max_h = np.min(coords[0, :]), np.max(
            coords[0, :])  # 0 is height   (22,20464)
        min_w, max_w = np.min(coords[1, :]), np.max(
            coords[1, :])  # 1 is width    (6119,33036)

        num = 0

        # 使用numpy二维数组([0]-h, [1]-w)构造多边形, 需要注意的是,高度是第一维,宽度是第二维
        cur_poly = poly_transform.np_arr_to_poly(np.asarray(coords))  # 生成多边形

        # 一个图片滑动10次
        while num < 10:
            rand_h = np.random.randint(min_h, max_h)
            rand_w = np.random.randint(min_w, max_w)

            # 原始坐标是(w,h)
            h_over_flag = rand_h + args.crop_size >= slide_head.level_dimensions[
                args.slide_level][1]
            w_over_flag = rand_w + args.crop_size >= slide_head.level_dimensions[
                args.slide_level][0]
            if h_over_flag or w_over_flag:
                continue

            cen_h = int(rand_h + args.crop_size / 2)  # (5385)
            cen_w = int(rand_w + args.crop_size / 2)  # (14173)
            cen_point = Point(cen_w, cen_h)  # 构造几何图形中心点  (14173,5385)

            patch_in_flag = cen_point.within(cur_poly)  # 判断中心点是否在多边形之中

            if not patch_in_flag:
                num += 1
                continue

            # 将(1024,1024)的区域按随机生成的(rand_h,rand_w)裁剪出来
            cur_patch = slide_head.read_region(
                (rand_w, rand_h), args.slide_level,
                (args.crop_size, args.crop_size))  # shape : (1024,1024)
            cur_patch = np.asarray(cur_patch)[:, :, :
                                              3]  # (1024,1024)-->(1024,1024,3)

            # correct patch mask on ignore pixels
            ## very slow, need to speed this ignore part   1024x1024次迭代
            patch_mask = gen_patch_mask(args)

            for pw in range(rand_w, rand_w + args.crop_size):
                for ph in range(rand_h, rand_h + args.crop_size):
                    cur_p = Point(pw, ph)  # 当前坐标点
                    # 如果坐标点不在多边形曲线内,则mask置为0
                    if not cur_p.within(cur_poly):
                        patch_mask[ph - rand_h, pw - rand_w] = 0

            save_img = transform.resize(
                cur_patch, (args.save_size, args.save_size))  # (256,256,3)
            # 经过resize后变为了0或1的矩阵  (256,256)
            save_mask = transform.resize(
                patch_mask, (args.save_size, args.save_size),
                order=0)  # order=0: Nearest-neighbor interpolation

            save_mask = (save_mask * 255).astype(np.uint8)

            img_fullname = str(uuid.uuid4())[:8] + ".png"  # 随机生成8位唯一的id
            save_img_path = os.path.join(img_save_dir, img_fullname)
            save_mask_path = os.path.join(mask_save_dir, img_fullname)
            io.imsave(save_img_path, save_img)
            io.imsave(save_mask_path, save_mask)
            num = num + 1
def _slide_patch_generator(slide_path, annotation_dict, annotation_label):
    """
    generates the image and mask patch from slide, and save them.

    :param slide_path:
    :param annotation_dict:
    :param annotation_label:
    :return:
    """

    slide = openslide.OpenSlide(slide_path)
    slide_name = os.path.basename(slide_path)

    if opt.slide_level < 0 or opt.slide_level >= slide.level_count:
        raise Exception(
            f'level {opt.slide_level} is not available in the {slide_name}')

    if annotation_label not in annotation_label_dict:
        raise Exception(
            f"a value of an annotation '{annotation_label}' is not set")

    for curr_annotation_region in annotation_dict:
        annotation_coords = \
            (annotation_dict[curr_annotation_region] / slide.level_downsamples[opt.slide_level]).astype(np.int32)
        annotation_coords = np.transpose(np.array(annotation_coords))
        # swap width and height
        annotation_coords[[0, 1]] = annotation_coords[[1, 0]]
        min_h, max_h = np.min(annotation_coords[0, :]), np.max(
            annotation_coords[0, :])
        min_w, max_w = np.min(annotation_coords[1, :]), np.max(
            annotation_coords[1, :])

        try:
            annotation_polygon = poly_transform.np_arr_to_poly(
                np.asarray(annotation_coords))
        except ValueError as e:
            logging.error(
                f'Failed to transform coordinates to polygon, this will be skipped,'
                f' slide_name: {slide_name}, region: {curr_annotation_region}')
            logging.error(f'Exception: {e}')
            continue

        patch_generate_try_cnt = 0
        while patch_generate_try_cnt < opt.patch_gen_try_num:
            rand_h = np.random.randint(min_h, max_h)
            rand_w = np.random.randint(min_w, max_w)
            is_height_exceeds = rand_h + opt.crop_size >= slide.level_dimensions[
                opt.slide_level][1]
            is_width_exceeds = rand_w + opt.crop_size >= slide.level_dimensions[
                opt.slide_level][0]
            if is_width_exceeds or is_height_exceeds:
                continue

            patch_center_coord_h = int(rand_h + opt.crop_size / 2)
            patch_center_coord_w = int(rand_w + opt.crop_size / 2)
            patch_center_point = Point(patch_center_coord_w,
                                       patch_center_coord_h)
            if not patch_center_point.within(annotation_polygon):
                patch_generate_try_cnt += 1
                continue

            curr_patch = slide.read_region((rand_w, rand_h), opt.slide_level,
                                           (opt.crop_size, opt.crop_size))
            curr_patch = np.asarray(curr_patch)[:, :, :3]
            curr_patch_ground_truth = _generate_ground_truth(
                rand_w,
                rand_h,
                crop_size=opt.crop_size,
                annotation_polygon=annotation_polygon,
                pixel_annotation_value=annotation_label_dict[annotation_label])

            curr_patch = transform.resize(curr_patch,
                                          (opt.save_size, opt.save_size))

            # order=0: Nearest-neighbor interpolation
            curr_patch_ground_truth = transform.resize(
                curr_patch_ground_truth, (opt.save_size, opt.save_size),
                order=0,
                anti_aliasing=False)

            curr_patch = (curr_patch * 255).astype(np.uint8)
            curr_patch_ground_truth = curr_patch_ground_truth.astype(np.uint8)

            yield curr_patch, curr_patch_ground_truth

            patch_generate_try_cnt += 1
Exemple #12
0
def gen_patches(slide_path, annotation_dict, args):
    """ Generate patch images and masks based on annotations as well as slide information.

    """

    # load slide header information
    slide_head = openslide.OpenSlide(slide_path)
    slide_name = os.path.basename(slide_path)
    if args.slide_level < 0 or args.slide_level >= slide_head.level_count:
        print("level {} not availabel in {}".format(args.slide_level,
                                                    slide_name))
        sys.exit()

    img_save_dir, mask_save_dir = get_save_dirs(args)
    for cur_reg in annotation_dict:
        coords = (annotation_dict[cur_reg] /
                  slide_head.level_downsamples[args.slide_level]).astype(
                      np.int32)
        coords = np.transpose(np.array(coords))
        coords[[0, 1]] = coords[[1, 0]]  # swap width and height
        min_h, max_h = np.min(coords[0, :]), np.max(coords[0, :])
        min_w, max_w = np.min(coords[1, :]), np.max(coords[1, :])

        num = 0
        cur_poly = poly_transform.np_arr_to_poly(np.asarray(coords))
        while num < 10:
            rand_h = np.random.randint(min_h, max_h)
            rand_w = np.random.randint(min_w, max_w)
            h_over_flag = rand_h + args.crop_size >= slide_head.level_dimensions[
                args.slide_level][1]
            w_over_flag = rand_w + args.crop_size >= slide_head.level_dimensions[
                args.slide_level][0]
            if h_over_flag or w_over_flag:
                continue

            cen_h = int(rand_h + args.crop_size / 2)
            cen_w = int(rand_w + args.crop_size / 2)
            cen_point = Point(cen_w, cen_h)
            patch_in_flag = cen_point.within(cur_poly)
            if not patch_in_flag:
                num += 1
                continue

            # crop patch image
            cur_patch = slide_head.read_region(
                (rand_w, rand_h), args.slide_level,
                (args.crop_size, args.crop_size))
            cur_patch = np.asarray(cur_patch)[:, :, :3]

            # correct patch mask on ignore pixels
            ## very slow, need to speed this ignore part
            patch_mask = gen_patch_mask(args)
            for pw in range(rand_w, rand_w + args.crop_size):
                for ph in range(rand_h, rand_h + args.crop_size):
                    cur_p = Point(pw, ph)
                    if not cur_p.within(cur_poly):
                        patch_mask[ph - rand_h, pw - rand_w] = 0

            save_img = transform.resize(cur_patch,
                                        (args.save_size, args.save_size))
            save_mask = transform.resize(
                patch_mask, (args.save_size, args.save_size),
                order=0)  # order=0: Nearest-neighbor interpolation
            save_mask = (save_mask * 255).astype(np.uint8)

            img_fullname = str(uuid.uuid4())[:8] + ".png"
            save_img_path = os.path.join(img_save_dir, img_fullname)
            save_mask_path = os.path.join(mask_save_dir, img_fullname)
            io.imsave(save_img_path, save_img)
            io.imsave(save_mask_path, save_mask)
            num = num + 1
Exemple #13
0
def gen_samples(slides_dir, patch_level, patch_size, tumor_type, slide_list, dset, overlap_mode):
    # prepare saving directory
    patch_path = os.path.join(os.path.dirname(slides_dir), "Patches", tumor_type)
    patch_img_dir = os.path.join(patch_path, dset, "imgs")
    if not os.path.exists(patch_img_dir):
        os.makedirs(patch_img_dir)
    patch_mask_dir = os.path.join(patch_path, dset, "masks")
    if not os.path.exists(patch_mask_dir):
        os.makedirs(patch_mask_dir)

    # processing slide one-by-one
    ttl_patch = 0
    slide_list.sort()
    for ind, ele in enumerate(slide_list):
        print("Processing {} {}/{}".format(ele, ind+1, len(slide_list)))
        cur_slide_path = os.path.join(slides_dir, ele+".svs")
        if os.path.exists(cur_slide_path):
            cur_slide_path = os.path.join(slides_dir, ele+".svs")

        # locate contours and generate batches based on tissue contours
        cnts, d_factor = tl.locate_tissue_cnts(cur_slide_path, max_img_size=2048, smooth_sigma=13,
                                               thresh_val=0.88, min_tissue_size=120000)
        select_level, select_factor = tl.select_slide_level(cur_slide_path, max_size=2048)
        cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True)

        # scale contour to slide level 2
        wsi_head = pyramid.load_wsi_head(cur_slide_path)
        cnt_scale = select_factor / int(wsi_head.level_downsamples[patch_level])
        tissue_arr = cv_cnt_to_np_arr(cnts[0] * cnt_scale).astype(np.int32)
        # convert tissue_arr to convex if poly is not valid
        tissue_poly = np_arr_to_poly(tissue_arr)
        if tissue_poly.is_valid == False:
            tissue_arr = poly_to_np_arr(tissue_poly.convex_hull).astype(int)

        coors_arr = None
        if overlap_mode == "half_overlap":
            level_w, level_h = wsi_head.level_dimensions[patch_level]
            coors_arr = contour.contour_patch_splitting_half_overlap(tissue_arr, level_h, level_w, patch_size, inside_ratio=0.80)
        elif overlap_mode == "self_overlap":
            coors_arr = contour.contour_patch_splitting_self_overlap(tissue_arr, patch_size, inside_ratio=0.80)
        else:
            raise NotImplementedError("unknown overlapping mode")

        wsi_img = wsi_head.read_region((0, 0), patch_level, wsi_head.level_dimensions[patch_level])
        wsi_img = np.asarray(wsi_img)[:,:,:3]
        mask_path = os.path.join(slides_dir, "_".join([ele, tumor_type+".tif"]))
        mask_img = io.imread(mask_path)
        wsi_mask = (transform.resize(mask_img, wsi_img.shape[:2], order=0) * 255).astype(np.uint8) * 255

        if dset == "val":
            test_slides_dir = os.path.join(os.path.dirname(slides_dir), "TestSlides")
            if not os.path.exists(os.path.join(test_slides_dir, cur_slide_path)):
                shutil.copy(cur_slide_path, test_slides_dir)
            if not os.path.exists(os.path.join(test_slides_dir, mask_path)):
                shutil.copy(mask_path, test_slides_dir)

        for cur_arr in coors_arr:
            cur_h, cur_w = cur_arr[0], cur_arr[1]
            cur_patch = wsi_img[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size]
            if cur_patch.shape[0] != patch_size or cur_patch.shape[1] != patch_size:
                continue
            cur_mask = wsi_mask[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size]
            # background RGB (235, 210, 235) * [0.299, 0.587, 0.114]
            if patch.patch_bk_ratio(cur_patch, bk_thresh=0.864) > 0.88:
                continue

            if overlap_mode == "half_overlap" and tumor_type == "viable":
                pixel_ratio = np.sum(cur_mask > 0) * 1.0 / cur_mask.size
                if pixel_ratio < 0.05:
                    continue

            patch_name = ele + "_" + str(uuid.uuid1())[:8]
            io.imsave(os.path.join(patch_img_dir, patch_name+".jpg"), cur_patch)
            io.imsave(os.path.join(patch_mask_dir, patch_name+".png"), cur_mask)
            ttl_patch += 1

    print("There are {} patches in total.".format(ttl_patch))