def cut_cells(filename, labels_csv, save_path, factor, N):
    labels = read_labels_csv(labels_csv)
    marked_boxes = read_labels_xml(os.path.splitext(filename)[0] + ".xml")

    try:
        slide = openslide.OpenSlide(filename)
    except:
        slide = TSlide(filename)

    basename = os.path.splitext(os.path.basename(filename))[0]
    parent_d = os.path.basename(os.path.dirname(filename))
    save_path = os.path.join(save_path, parent_d, basename)
    for box in labels:
        x, y, w, h, p, label = box
        marked_class_i = is_overlapped(marked_boxes, box, factor)
        if marked_class_i:
            image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, marked_class_i, basename, x, y, w, h, N)
        else:
            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                p, basename, x, y, w, h, N)
        save_path_i = os.path.join(save_path, label)
        os.makedirs(save_path_i, exist_ok=True)
        image_fullname = os.path.join(save_path_i, image_name)
        x_N, y_N = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)
        w_N, h_N = int(N * w), int(N * h)
        slide.read_region((x_N, y_N), 0,
                          (w_N, h_N)).convert("RGB").save(image_fullname)
    slide.close()
Esempio n. 2
0
    def cut_cells(self, tifname, new_dict, save_path):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
        """
        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                image_name = "{}_x{}_y{}_w{}_h{}.jpg".format(
                    basename, x, y, w, h)
                save_path_i = os.path.join(save_path, str(box[2]))
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region((x, y), 0,
                                  (w, h)).convert("RGB").save(image_fullname)
        slide.close()
Esempio n. 3
0
    def gen_np_array_mem(self,
                         results,
                         classes=cfg.darknet.classes,
                         det=cfg.xception.det1,
                         size=cfg.xception.size):
        """
        :param classes: [class_i,]
        :param det: the threshold of det to use certain box or not, from darknet prediction
        :param size: image size to cut, default to 299, which is used in Xception/inception
        :param results: dict generated after running darknet predict: {x_y: [(class_i, det, (x,y,w,h)),]}
        :return:
            numpy array: numpy array of each cell, in order
            cell_index: {index: [x_y, [class_i, det, (x,y,w,h)]]},
                        index is index in numpy array,
                           x_y is jpg image name, it represents cell source,
                        box = [class_i, det, (x,y,w,h)] is cell info from darknet
        """
        def resize_img(img, size):

            # pad zero with short side
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            # now, yolo output is square, only need resize

            img_resized = img_croped.resize((size, size))
            return img_resized

        try:
            slide = openslide.OpenSlide(self.input_file)
        except:
            slide = TSlide(self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    x = int(x_y.split('_')[0]) + int(box[2][0])
                    y = int(x_y.split('_')[1]) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1
        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index
Esempio n. 4
0
def worker(tiff_path, keys, points_dict, save_path, N):
    basename = os.path.splitext(os.path.basename(tiff_path))[0].replace(
        " ", "-")

    try:
        slide = openslide.OpenSlide(tiff_path)
    except:
        slide = TSlide(tiff_path)

    cell_count = 0
    for x_y in keys:
        boxes = points_dict[x_y]
        for box in boxes:
            x0, y0 = x_y.split('_')
            x = int(x0) + int(box[4][0])
            y = int(y0) + int(box[4][1])
            w = int(box[4][2])
            h = int(box[4][3])

            # make save dir
            cell_save_dir = os.path.join(save_path, box[2])
            os.makedirs(cell_save_dir, exist_ok=True)

            image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                1 - box[3], basename, x, y, w, h, N)
            cell_save_path = os.path.join(cell_save_dir, image_name)

            slide.read_region(
                (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                (int(N * w), int(N * h))).convert("RGB").save(cell_save_path)

            cell_count += 1

    slide.close()

    return cell_count
Esempio n. 5
0
    def cut_cells_p_marked_(self,
                            tifname,
                            new_dict,
                            save_path,
                            factor=0.3,
                            N=4):
        def get_labels(xmlname):
            if not os.path.isfile(xmlname):
                return []

            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa55ff": "EC",
                "#ff5500": "AGC",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        tiff_dict = get_tiff_dict()
        if tifname not in tiff_dict:
            raise Exception("XCEPTION POSTPROCESS %s NOT FOUND" % tifname)

        try:
            slide = openslide.OpenSlide(tiff_dict[tifname])
        except:
            slide = TSlide(tiff_dict[tifname])

        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                _, x, y = re.findall(pattern, x_y)[0]
                x = int(x) + int(box[4][0])
                y = int(y) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])

                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Esempio n. 6
0
    def cut_cells_p_marked(self,
                           tifname,
                           new_dict,
                           save_path,
                           factor=0.3,
                           N=4):
        """
        :param tifname: full path name of .tif/.kfb file
        :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]}
        :param save_path: image saving path (note: image is saved under class_i)
        :param factor: overlapping threshold, added marked info to image filename if overlapped
        :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi)
                        (note: x, y here is relative to wsi.
                               p is the value of the second det.
                               image size is twice the annotation box.
                               check if the cell is marked, add marked if so.)
        """

        # https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely
        def get_labels(xmlname):
            """collect labeled boxes from asap xml
            :param xmlname: full path name of .xml file, got from .tif/.kfb file
            :output format: [[class_i, [(xi,yi),]],]
            """
            if not os.path.isfile(xmlname):
                return []
            classes = {
                "#aa0000": "HSIL",
                "#aa007f": "ASCH",
                "#005500": "LSIL",
                "#00557f": "ASCUS",
                "#0055ff": "SCC",
                "#aa557f": "ADC",
                "#aa55ff": "EC",
                "#ff5500": "AGC1",
                "#ff557f": "AGC2",
                "#ff55ff": "AGC3",
                "#00aa00": "FUNGI",
                "#00aa7f": "TRI",
                "#00aaff": "CC",
                "#55aa00": "ACTINO",
                "#55aa7f": "VIRUS",
                "#ffffff": "NORMAL",
                "#000000": "MC",
                "#aa00ff": "SC",
                "#ff0000": "RC",
                "#aa5500": "GEC"
            }
            DOMTree = xml.dom.minidom.parse(xmlname)
            collection = DOMTree.documentElement
            annotations = collection.getElementsByTagName("Annotation")
            marked_boxes = []
            for annotation in annotations:
                colorCode = annotation.getAttribute("Color")
                if not colorCode in classes:
                    continue
                marked_box = [classes[colorCode], []]
                coordinates = annotation.getElementsByTagName("Coordinate")
                marked_box[1] = [(float(coordinate.getAttribute('X')),
                                  float(coordinate.getAttribute('Y')))
                                 for coordinate in coordinates]
                marked_boxes.append(marked_box)
            return marked_boxes

        def is_overlapped(marked_boxes, predicted_box, factor):
            """check if predicted box is marked already
            :param marked_boxes: [[class_i, [(xi,yi),]],]
            :param box: (x, y, w, h)
            :param factor: overlapping threshold, added marked info to image filename if overlapped
            """
            for marked_box in marked_boxes:
                marked_box_obj = geometry.Polygon(marked_box[1])
                predicted_box_obj = geometry.box(
                    predicted_box[0], predicted_box[1],
                    predicted_box[0] + predicted_box[2],
                    predicted_box[1] + predicted_box[3])
                if marked_box_obj.intersection(predicted_box_obj).area / (
                        marked_box_obj.area + predicted_box_obj.area -
                        marked_box_obj.intersection(predicted_box_obj).area
                ) >= factor:
                    return marked_box[0]
            return ""

        try:
            slide = openslide.OpenSlide(tifname)
        except:
            slide = TSlide(tifname)
        basename = os.path.splitext(os.path.basename(tifname))[0]
        parent_d = os.path.basename(os.path.dirname(tifname))
        save_path = os.path.join(save_path, parent_d, basename)
        marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml")
        for x_y, boxes in new_dict.items():
            for box in boxes:
                # image naming: tifname_x_y_w_h_p.jpg
                x = int(x_y.split('_')[0]) + int(box[4][0])
                y = int(x_y.split('_')[1]) + int(box[4][1])
                w = int(box[4][2])
                h = int(box[4][3])

                marked_class_i = is_overlapped(marked_boxes, (x, y, w, h),
                                               factor)
                if marked_class_i:
                    image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], marked_class_i, basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2], "marked")
                else:
                    image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format(
                        1 - box[3], basename, x, y, w, h, N)
                    save_path_i = os.path.join(save_path, box[2])
                os.makedirs(save_path_i, exist_ok=True)
                image_fullname = os.path.join(save_path_i, image_name)
                slide.read_region(
                    (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
                    (int(N * w), int(
                        N * h))).convert("RGB").save(image_fullname)

        slide.close()
Esempio n. 7
0
    def gen_np_array_mem_(self,
                          results,
                          classes=cfg.darknet.classes,
                          det=cfg.xception.det1,
                          size=cfg.xception.size):
        def resize_img(img, size):
            img_croped = img.crop(
                (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2),
                 img.size[0] + (size - img.size[0]) / 2,
                 img.size[1] + (size - img.size[1]) / 2))
            img_resized = img_croped.resize((size, size))

            return img_resized

        tiff_dict = get_tiff_dict()
        if self.input_file not in tiff_dict:
            raise Exception("XCEPTION PREPROCESS %s NOT FOUND" %
                            self.input_file)

        try:
            try:
                slide = openslide.OpenSlide(tiff_dict[self.input_file])
            except:
                slide = TSlide(tiff_dict[self.input_file])
        except:
            raise Exception('TIFF FILE OPEN FAILED => %s' % self.input_file)

        cell_list = []
        cell_index = {}
        index = 0
        for x_y, boxes in results.items():
            for box in boxes:
                if box[0] in classes and box[1] > det:
                    # print(x_y)
                    _, x, y = re.findall(self.pattern, x_y)[0]
                    # print("1=> %s, %s" % (x, y))
                    x = int(x) + int(box[2][0])
                    y = int(y) + int(box[2][1])
                    w = int(box[2][2])
                    h = int(box[2][3])
                    # print("2=> %s, %s" % (x, y))

                    # center_x = x + int(w / 2 + 0.5)
                    # center_y = y + int(h / 2 + 0.5)
                    # w_ = max(w, h)
                    # h_ = w_

                    # x_ = center_x - int(w_ / 2 + 0.5)
                    # y_ = center_y - int(h_ / 2 + 0.5)

                    # x_ = 0 if x_ < 0 else x_
                    # y_ = 0 if y_ < 0 else y_

                    cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
                    # cell = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB")
                    # image_name = "%s_%s_%s_%s.jpg" % (x, y, w, h)
                    # cell.save(os.path.join('/home/tsimage/Development/DATA/middle_cells', image_name))

                    cell_list.append(np.array(resize_img(cell, size)))
                    cell_index[index] = [x_y, list(box)]
                    index += 1

        slide.close()
        # return np.asarray(cell_list), cell_index
        return cell_list, cell_index