Ejemplo n.º 1
0
    def cut_cells(self, tifname, new_dict, save_path):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
        """
        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                image_name = "{}_x{}_y{}_w{}_h{}.jpg".format(
                    basename, x, y, w, h)
                save_path_i = os.path.join(save_path, str(box[2]))
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region((x, y), 0,
                                  (w, h)).convert("RGB").save(image_fullname)
        slide.close()
Ejemplo n.º 2
0
def cut_cells(filename, labels_csv, save_path, factor, N):
    labels = read_labels_csv(labels_csv)
    marked_boxes = read_labels_xml(os.path.splitext(filename)[0] + ".xml")

    try:
        slide = openslide.OpenSlide(filename)
    except:
        slide = TSlide(filename)

    basename = os.path.splitext(os.path.basename(filename))[0]
    parent_d = os.path.basename(os.path.dirname(filename))
    save_path = os.path.join(save_path, parent_d, basename)
    for box in labels:
        x, y, w, h, p, label = box
        marked_class_i = is_overlapped(marked_boxes, box, factor)
        if marked_class_i:
            image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, marked_class_i, basename, x, y, w, h, N)
        else:
            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, basename, x, y, w, h, N)
        save_path_i = os.path.join(save_path, label)
        os.makedirs(save_path_i, exist_ok=True)
        image_fullname = os.path.join(save_path_i, image_name)
        x_N, y_N = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)
        w_N, h_N = int(N * w), int(N * h)
        slide.read_region((x_N, y_N), 0,
                          (w_N, h_N)).convert("RGB").save(image_fullname)
    slide.close()
Ejemplo n.º 3
0
def worker(image, start_x, start_y, height, patch_width, patch_height, delta,
           patch_save_path):
    """
    按行切图,并保存为图像文件,每个 worker 只负责处理一行
    :param image: 图像文件地址
    :param start_x: 切图起点坐标-x
    :param start_y: 切图起点坐标-y
    :param height: 切图最大高度
    :param patch_width: 切图尺寸-宽
    :param patch_height: 切图尺寸-高
    :param delta: 切图步长
    :param patch_save_path:
    :return: 切图数量
    """

    # 结果队列
    queue = []

    try:
        slide = openslide.OpenSlide(image)
    except:
        slide = TSlide(image)

    # # 获取图像宽,高
    # width, height = slide.dimensions

    try:
        while start_y < height:
            # 读取patch块
            patch = slide.read_region((start_x, start_y), 0,
                                      (patch_width, patch_height))

            # 图像格式转换
            patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)

            # 过滤
            patch_gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
            if cv2.Laplacian(patch_gray, cv2.CV_64F).var() > cfg.slice.THRESH:
                # 生成文件路径
                save_path = os.path.join(patch_save_path,
                                         "%s_%s.jpg" % (start_x, start_y))
                # 文件写入
                cv2.imwrite(save_path, patch)
                cv2.imwrite(
                    '/tmp/metadata/cells/1/%s_%s.jpg' % (start_x, start_y),
                    patch)

                queue.append(save_path)

            start_y += delta

        return queue
    except:
        raise
Ejemplo n.º 4
0
    def gen_np_array_mem(self,
                         results,
                         classes=cfg.darknet.classes,
                         det=cfg.xception.det1,
                         size=cfg.xception.size):
        """
        :param classes: [class_i,]
        :param det: the threshold of det to use certain box or not, from darknet prediction
        :param size: image size to cut, default to 299, which is used in Xception/inception
        :param results: dict generated after running darknet predict: {x_y: [(class_i, det, (x,y,w,h)),]}
        :return:
            numpy array: numpy array of each cell, in order
            cell_index: {index: [x_y, [class_i, det, (x,y,w,h)]]},
                        index is index in numpy array,
                           x_y is jpg image name, it represents cell source,
                        box = [class_i, det, (x,y,w,h)] is cell info from darknet
        """
        def resize_img(img, size):

            # pad zero with short side
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            # now, yolo output is square, only need resize

            img_resized = img_croped.resize((size, size))
            return img_resized

        try:
            slide = openslide.OpenSlide(self.input_file)
        except:
            slide = TSlide(self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    x = int(x_y.split('_')[0]) + int(box[2][0])
                    y = int(x_y.split('_')[1]) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1
        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index
Ejemplo n.º 5
0
def worker(tiff_path, keys, points_dict, save_path, N):
    basename = os.path.splitext(os.path.basename(tiff_path))[0].replace(
        " ", "-")

    try:
        slide = openslide.OpenSlide(tiff_path)
    except:
        slide = TSlide(tiff_path)

    cell_count = 0
    for x_y in keys:
        boxes = points_dict[x_y]
        for box in boxes:
            x0, y0 = x_y.split('_')
            x = int(x0) + int(box[4][0])
            y = int(y0) + int(box[4][1])
            w = int(box[4][2])
            h = int(box[4][3])

            # make save dir
            cell_save_dir = os.path.join(save_path, box[2])
            os.makedirs(cell_save_dir, exist_ok=True)

            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                1 - box[3], basename, x, y, w, h, N)
            cell_save_path = os.path.join(cell_save_dir, image_name)

            slide.read_region(
                (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                (int(N * w), int(N * h))).convert("RGB").save(cell_save_path)

            cell_count += 1

    slide.close()

    return cell_count
Ejemplo n.º 6
0
def worker_in_memory(image, start_x, start_y, height, patch_width,
                     patch_height, delta):
    """
    按行切图,并保存为图像文件,每个 worker 只负责处理一行
    :param image: 图像文件地址
    :param start_x: 切图起点坐标-x
    :param start_y: 切图起点坐标-y
    :param height: 切图最大高度
    :param patch_width: 切图尺寸-宽
    :param patch_height: 切图尺寸-高
    :param delta: 切图步长
    :return: 切图数量
    """

    # 结果队列
    queue = {}

    try:
        slide = openslide.OpenSlide(image)
    except:
        slide = TSlide(image)

    try:
        while start_y < height:
            # 读取patch块
            patch = slide.read_region((start_x, start_y), 0,
                                      (patch_width, patch_height))

            # 图像格式转换
            patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
            key = '%s_%s' % (start_x, start_y)
            queue[key] = patch

            start_y += delta

        return queue
    except:
        raise
Ejemplo n.º 7
0
    def cut_cells_p_marked_(self,
                            tifname,
                            new_dict,
                            save_path,
                            factor=0.3,
                            N=4):
        def get_labels(xmlname):
            if not os.path.isfile(xmlname):
                return []

            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa55ff": "EC",
                "#ff5500": "AGC",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        tiff_dict = get_tiff_dict()
        if tifname not in tiff_dict:
            raise Exception("XCEPTION POSTPROCESS %s NOT FOUND" % tifname)

        try:
            slide = openslide.OpenSlide(tiff_dict[tifname])
        except:
            slide = TSlide(tiff_dict[tifname])

        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                _, x, y = re.findall(pattern, x_y)[0]
                x = int(x) + int(box[4][0])
                y = int(y) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])

                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Ejemplo n.º 8
0
    def cut_cells_p_marked(self,
                           tifname,
                           new_dict,
                           save_path,
                           factor=0.3,
                           N=4):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :param factor: overlapping threshold, added marked info to image filename if overlapped
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
                        (note: x, y here is relative to wsi.
                               p is the value of the second det.
                               image size is twice the annotation box.
                               check if the cell is marked, add marked if so.)
        """

        # https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely
        def get_labels(xmlname):
            """collect labeled boxes from asap xml
            :param xmlname: full path name of .xml file, got from .tif/.kfb file
            :output format: [[class_i, [(xi,yi),]],]
            """
            if not os.path.isfile(xmlname):
                return []
            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa557f": "ADC",
                "#aa55ff": "EC",
                "#ff5500": "AGC1",
                "#ff557f": "AGC2",
                "#ff55ff": "AGC3",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            """check if predicted box is marked already
            :param marked_boxes: [[class_i, [(xi,yi),]],]
            :param box: (x, y, w, h)
            :param factor: overlapping threshold, added marked info to image filename if overlapped
            """
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Ejemplo n.º 9
0
    def run(self):
        print("Initial DARKNET and XCEPTION model ...")

        total = len(self.tiff_lst)
        for index, tiff in enumerate(self.tiff_lst):
            # 获取大图文件名,不带后缀
            tiff_basename, _ = os.path.splitext(os.path.basename(tiff))
            tiff_basename = tiff_basename.replace(" ", "-")
            print('Process %s / %s %s ...' % (index + 1, total, tiff_basename))

            # 切片文件存储路径
            slice_save_path = os.path.join(self.slice_dir_path, tiff_basename)

            t0 = datetime.datetime.now()
            # 如果路径下切图文件不存在,执行切图
            if not os.path.exists(slice_save_path):
                # 执行切图
                ImageSlice(tiff, self.slice_dir_path).get_slices()

            # 获取切图文件路径
            tif_images = FilesScanner(slice_save_path, ['.jpg']).get_files()
            t1 = datetime.datetime.now()
            print('TIFF SLICE COST: %s' % (t1 - t0))

            tasks = []

            # 创建切图进程池
            executor = ProcessPoolExecutor(max_workers=GPU_NUM)

            if len(tif_images) < cfg.darknet.min_job_length:
                tasks.append(executor.submit(yolo_predict, '0', tif_images))
            else:
                # 任务切分
                n = int((len(tif_images) / float(GPU_NUM)) + 0.5)
                patches = [tif_images[i: i + n] for i in range(0, len(tif_images), n)]

                for gpu_index, patch in enumerate(patches):
                    tasks.append(executor.submit(yolo_predict, str(gpu_index), patch))

            seg_results = {}
            for future in as_completed(tasks):
                result = future.result()
                seg_results.update(result)

            # 关闭进程池
            executor.shutdown(wait=True)

            try:
                slide = openslide.OpenSlide(tiff)
            except:
                slide = TSlide(tiff)

            keys = list(seg_results.keys())
            for key in keys:
                lst = seg_results[key]
                x0, y0 = key.split('_')
                x0, y0 = int(x0), int(y0)

                for item in lst:
                    label, accuracy, (x, y, w, h) = item
                    accuracy, x, y, w, h = float(accuracy), int(x), int(y), int(w), int(h)
                    x, y = x0 + x, y0 + y

                    save_path = os.path.join(self.cells_path, tiff_basename, label)
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)

                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}.jpg".format(1 - accuracy, tiff_basename, x, y, w, h)
                    slide.read_region((x, y), 0, (w, h)).convert("RGB").save(os.path.join(save_path, image_name))
Ejemplo n.º 10
0
    def gen_np_array_mem_(self,
                          results,
                          classes=cfg.darknet.classes,
                          det=cfg.xception.det1,
                          size=cfg.xception.size):
        def resize_img(img, size):
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            img_resized = img_croped.resize((size, size))

            return img_resized

        tiff_dict = get_tiff_dict()
        if self.input_file not in tiff_dict:
            raise Exception("XCEPTION PREPROCESS %s NOT FOUND" %
                            self.input_file)

        try:
            try:
                slide = openslide.OpenSlide(tiff_dict[self.input_file])
            except:
                slide = TSlide(tiff_dict[self.input_file])
        except:
            raise Exception('TIFF FILE OPEN FAILED => %s' % self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    # print(x_y)
                    _, x, y = re.findall(self.pattern, x_y)[0]
                    # print("1=> %s, %s" % (x, y))
                    x = int(x) + int(box[2][0])
                    y = int(y) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    # print("2=> %s, %s" % (x, y))

                    # center_x = x + int(w / 2 + 0.5)
                    # center_y = y + int(h / 2 + 0.5)
                    # w_ = max(w, h)
                    # h_ = w_

                    # x_ = center_x - int(w_ / 2 + 0.5)
                    # y_ = center_y - int(h_ / 2 + 0.5)

                    # x_ = 0 if x_ < 0 else x_
                    # y_ = 0 if y_ < 0 else y_

                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    # cell = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB")
                    # image_name = "%s_%s_%s_%s.jpg" % (x, y, w, h)
                    # cell.save(os.path.join('/home/tsimage/Development/DATA/middle_cells', image_name))

                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1

        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index