Ejemplo n.º 1
0
def worker(input_file_path, start_x, start_y, height, patch_save_path):
    """

    :param input_file_path: 输入文件路径
    :param start_x:
    :param start_y:
    :param height:
    :param patch_save_path:
    :return:
    """
    count = 0
    try:
        slide = openslide.OpenSlide(input_file_path)
    except:
        slide = TSlide(input_file_path)

    while start_y < height:
        # 读取patch块
        patch = slide.read_region((start_x, start_y), 0,
                                  (PATCH_SIZE, PATCH_SIZE))

        # 图像格式转换
        patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
        # 生成文件名及保存路径
        name = get_random_string()
        save_path = os.path.join(patch_save_path, "%s.jpg" % name)
        # 文件写入
        cv2.imwrite(save_path, patch)

        start_y += DELTA
        count += 1

    return count
Ejemplo n.º 2
0
def cut_fixed_size(tif_dir, positions, size, save_path):
    """
        tif_dir: tif folder
        positions: {tif: [(class_i, x, y, w, h),]}
        size: target jpg size
        save_path: target save path
    """
    def get_x_y(box, size):
        x_center = box[1] + box[3] / 2.0
        y_center = box[2] + box[4] / 2.0
        x = int(x_center - size / 2.0)
        y = int(y_center - size / 2.0)
        return (x, y)

    for tif, boxes in positions.items():
        #slide = openslide.OpenSlide(os.path.join(tif_dir, tif+".tif"))
        try:
            slide = openslide.OpenSlide(os.path.join(tif_dir, tif + ".tif"))
        except:
            slide = TSlide(os.path.join(tif_dir, tif + ".kfb"))
        for box in boxes:
            save_path_i = os.path.join(save_path, box[0])
            os.makedirs(save_path_i, exist_ok=True)
            x, y = get_x_y(box, size)
            cell = slide.read_region((x, y), 0, (size, size)).convert("RGB")
            scipy.misc.imsave(
                os.path.join(save_path_i, "{}_{}_{}.jpg".format(tif, x, y)),
                cell)
        slide.close()
        print("processed: {}".format(tif))
Ejemplo n.º 3
0
def matting_job(points_lst, slide_path, output):
    try:
        slide = openslide.OpenSlide(slide_path)
    except:
        slide = TSlide(slide_path)

    for point in points_lst:
        yolo_label = point['yolo_cell_class']
        yolo_accu = point['yolo_cell_det']
        xcp_label = point['xcp_cell_class']
        xcp_accu = point['xcp_cell_class_det']
        x = point['x']
        y = point['y']
        w = point['w']
        h = point['h']
        image_data = point['cell_image']

        basename, _ = os.path.splitext(os.path.basename(slide_path))
        save_path = os.path.join(output, basename, xcp_label)
        os.makedirs(save_path, exist_ok=True)
        image_name = "%.4f_%s_%.4f_%s_%s_%s_%s_.jpg" % (xcp_accu, yolo_label,
                                                        yolo_accu, x, y, w, h)

        # get image data program
        image_data.save(os.path.join(save_path, image_name))

        # get image from slide
        image_name = "%.4f_%s_%.4f_%s_%s_%s_%s.jpg" % (xcp_accu, yolo_label,
                                                       yolo_accu, x, y, w, h)
        slide.read_region((point['x'], point['y']), 0,
                          (point['w'], point['h'])).convert("RGB").save(
                              os.path.join(save_path, image_name))
Ejemplo n.º 4
0
def cell_sampling(xml_file, save_path, size):
    labels = get_labels(xml_file)
    # print(labels)

    filename = os.path.splitext(xml_file)[0]
    if (not os.path.isfile(filename + ".tif")) and (
            not os.path.isfile(filename + ".kfb")):
        print(filename + " doesn't exist")
        return
    try:
        slide = openslide.OpenSlide(filename + ".tif")
    except:
        slide = TSlide(filename + ".kfb")

    size_x, size_y = slide.dimensions
    points_xy = get_windows(labels, size_x, size_y, size)
    # print(points_xy)

    # generate jpg files
    for (x, y) in points_xy:
        cell = slide.read_region((x, y), 0, (size, size)).convert("RGB")
        cell.save(save_path + "/" + os.path.basename(filename) + "_" + str(x) +
                  "_" + str(y) + ".jpg")

    slide.close()

    # generate xml files
    new_xmls = Xml(os.path.basename(filename), save_path, points_xy, labels,
                   size)
    new_xmls.gen_xml()

    print("processed ", xml_file)
Ejemplo n.º 5
0
def worker(tiff_file_path,
           patch_save_path,
           patch_range=(0, 1),
           patch_num_need=PATCH_NUM_NEED,
           path_size=(PATCH_SIZE, PATCH_SIZE)):
    """
    随机切图 worker,
    :param tiff_file_path: TIFF 文件路径
    :param patch_save_path:  切图存放路径
    :param patch_range:  切图范围,默认(0,1)全幅图像
    :param patch_num_need:  随机切图数量
    :param path_size:  随机切图尺寸 (1024, 1024)
    :return:
    """

    # 获取 TIFF 文件句柄
    try:
        try:
            slide = openslide.OpenSlide(tiff_file_path)
        except:
            slide = TSlide(tiff_file_path)
    except:
        print("TIFF OPEN FAILED => %s" % tiff_file_path)
        return 1, tiff_file_path

    width, height = slide.dimensions

    # 获取随机坐标列表 x_lst
    x_lst = [
        int(random.uniform(patch_range[0], patch_range[1]) * width)
        for _ in range(patch_num_need)
    ]

    # 获取随机坐标列表 y_lst
    y_lst = [
        int(random.uniform(patch_range[0], patch_range[1]) * height)
        for _ in range(patch_num_need)
    ]

    random_point_lst = list(zip(x_lst, y_lst))

    # 获取大图文件名
    basename, _ = os.path.splitext(os.path.basename(tiff_file_path))
    # 切图大小
    w, h = path_size
    for index, (x, y) in enumerate(random_point_lst):
        save_path = os.path.join(patch_save_path, basename)
        if not os.path.exists(save_path):
            os.makedirs(save_path, exist_ok=True)

        # 保存 PATCH
        image_name = "%s_x%s_y%s_w%s_h%s_s%s.jpg" % (basename, x, y, w, h,
                                                     index)
        slide.read_region(
            (x, y), 0,
            (w, h)).convert("RGB").save(os.path.join(save_path, image_name))

    return 0, None
Ejemplo n.º 6
0
def cut_cells(xml_file, save_path):
    # from .xml filename, get .tif/.kfb filename
    filename = os.path.splitext(xml_file)[0]
    try:
        slide = openslide.OpenSlide(filename + ".tif")
    except:
        slide = TSlide(filename + ".kfb")

    basename = os.path.basename(filename)

    # open .xml file
    DOMTree = xml.dom.minidom.parse(xml_file)
    collection = DOMTree.documentElement
    annotations = collection.getElementsByTagName("Annotation")
    for annotation in annotations:
        coordinates = annotation.getElementsByTagName("Coordinate")
        # read (x, y) coordinates
        x_coords = [float(coordinate.getAttribute("X")) for coordinate in coordinates]
        y_coords = [float(coordinate.getAttribute("Y")) for coordinate in coordinates]
        
        # get mininum-area-bounding-rectangle
        x_min = min(x_coords)
        x_max = max(x_coords)
        y_min = min(y_coords)
        y_max = max(y_coords)
        
        # 2 times the size of marked region
        x = int(1.5 * x_min - 0.5 * x_max)
        y = int(1.5 * y_min - 0.5 * y_max)
        x_size = int(2 * (x_max - x_min))
        y_size = int(2 * (y_max - y_min))
        
        # # take out the size as it is
        # x = int(x_min)
        # y = int(y_min)
        # x_size = int(x_max - x_min)
        # y_size = int(y_max - y_min)

        # if annotation.getAttribute("Color") in classes:
        # cell_path = os.path.join(save_path, basename)
        # os.makedirs(cell_path, exist_ok=True)
        cell_path = save_path
        cell_name = "{}_x{}_y{}_w{}_h{}.bmp".format(basename, 
                                                    int(x_min), 
                                                    int(y_min), 
                                                    int(x_max-x_min),
                                                    int(y_max-y_min))
        cell_path_name = os.path.join(cell_path, cell_name)
        cell = slide.read_region((x, y), 0, (x_size, y_size)).convert("RGB")
        cell.save(cell_path_name)

        # save yolo-txt
        txt_path_name = os.path.splitext(cell_path_name)[0] + ".txt"
        with open(txt_path_name, 'w') as f:
            f.write("0 0.50 0.50 0.50 0.50\n")

    slide.close()
Ejemplo n.º 7
0
def cell_sampling(xml_path, tif_path, save_path):
    labels = get_labels(xml_path)

    # return if no cells to cut
    if not labels:
        return

    try:
        try:
            slide = openslide.OpenSlide(tif_path)
        except:
            slide = TSlide(tif_path)
    except:
        print("ERROR: can not open pic ", tif_path)
        exit()

    basename = os.path.splitext(os.path.basename(xml_path))[0]
    for label in labels:
        window = slide.read_region(
            (label["x_win"], label["y_win"]), 0,
            (label["w_win"], label["h_win"])).convert("RGB")
        window = np.asarray(window)
        window = cv2.cvtColor(window, cv2.COLOR_RGB2BGR)
        window = cv2.pyrDown(window)

        basename_new = "{}_x{}_y{}".format(basename, label["x"], label["y"])

        # save image
        win_path = os.path.join(save_path, label["class_i"],
                                basename_new + ".bmp")
        os.makedirs(os.path.dirname(win_path), exist_ok=True)
        cv2.imwrite(win_path, window)

        # save coordinates info
        txt_path = os.path.join(save_path, label["class_i"],
                                basename_new + ".txt")
        values = []
        for add_label in label["add_labels"]:
            values.append([
                add_label["class_i"], add_label["dx"] / 2, add_label["dy"] / 2,
                (add_label["x_max"] - add_label["x_min"]) / 2,
                (add_label["y_max"] - add_label["y_min"]) / 2
            ])
        # values = [label["class_i"], label["dx"]/2, label["dy"]/2, label["w"]/2, label["h"]/2]
        with open(txt_path, 'w') as f:
            for value in values:
                f.write(' '.join([str(a) for a in value]) + '\n')

    slide.close()

    print("finished cutting {}, # cells: {}".format(tif_path, len(labels)))
Ejemplo n.º 8
0
def cut_cells(xml_file, save_path, size, position):
    # get basename, without extension
    basename = os.path.splitext(os.path.basename(xml_file))[0]
    wsi_name = os.path.splitext(xml_file)[0]
    if (not os.path.isfile(wsi_name + ".tif")) and (
            not os.path.isfile(wsi_name + ".kfb")):
        print(wsi_name + " doesn't exist")
        return
    try:
        slide = openslide.OpenSlide(wsi_name + ".tif")
    except:
        slide = TSlide(wsi_name + ".kfb")
    # open .xml file
    DOMTree = xml.dom.minidom.parse(xml_file)
    collection = DOMTree.documentElement
    annotations = collection.getElementsByTagName("Annotation")
    count = 0
    for annotation in annotations:
        if annotation.getAttribute("Color") in colors:
            coordinates = annotation.getElementsByTagName("Coordinate")
            # read (x, y) coordinates
            x_coords = [
                float(coordinate.getAttribute("X"))
                for coordinate in coordinates
            ]
            y_coords = [
                float(coordinate.getAttribute("Y"))
                for coordinate in coordinates
            ]
            # get the (x, y) coordinates for read_region()
            x, y = get_xy(
                (min(x_coords), min(y_coords), max(x_coords), max(y_coords)),
                size, position)
            save_path_i = os.path.join(
                save_path, basename, colors[annotation.getAttribute("Color")])
            os.makedirs(save_path_i, exist_ok=True)
            cell = slide.read_region((x, y), 0, (size, size)).convert("RGB")
            cell.save(
                os.path.join(
                    save_path_i,
                    "{}_x{}_y{}_px{}_py{}.jpg".format(basename,
                                                      int(min(x_coords)),
                                                      int(min(y_coords)),
                                                      position[0],
                                                      position[1])))
            count += 1
    slide.close()
    print("{}: number of cells {}. image size {}, position {}".format(
        basename, count, size, position))
Ejemplo n.º 9
0
 def batch_process(self, images_pre):
     """ batch cut images
     :param images_pre: [[class_i,(xmin,ymin,xmax,ymax),((xmin_z,ymin_z),(xmax_z,ymax_z))],]
     :return: [[class_i,(xmin,ymin,xmax,ymax),image],]
     """
     try:
         slide = TSlide(self.wsi_file)
     except:
         slide = openslide.OpenSlide(self.wsi_file)
     images = []
     for image_i in images_pre:
         image = slide.read_region(image_i[2][0], 0,
                                   image_i[2][1]).convert("RGB")
         images.append([image_i[0], image_i[1], image])
     slide.close()
     return images
Ejemplo n.º 10
0
 def get_cell_by_N(self, box, N):
     """ cut single image, given coordiantes box and size scaler N """
     # calculate new box_z
     x, y = box[0], box[1]
     w, h = box[2] - box[0], box[3] - box[1]
     x_cut, y_cut = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)
     w_cut, h_cut = int(N * w), int(N * h)
     # cut Nx sized image
     try:
         slide = TSlide(self.wsi_file)
     except:
         slide = openslide.OpenSlide(self.wsi_file)
     image = slide.read_region((x_cut, y_cut), 0,
                               (w_cut, h_cut)).convert("RGB")
     slide.close()
     return image
Ejemplo n.º 11
0
def generate_classified_cell_images(key, classified_data_save_path):
    tiff_path = tiff_dict[key]

    try:
        try:
            slide = openslide.OpenSlide(tiff_path)
        except:
            slide = TSlide(tiff_path)
    except:
        raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path)

    txt_data_path = os.path.join(classified_data_save_path, key)

    txts = os.listdir(txt_data_path)
    try:
        for txt in txts:
            if txt.endswith('.txt'):
                basename, _ = os.path.splitext(os.path.basename(txt))
                with open(os.path.join(txt_data_path, txt)) as f:
                    lines = f.readlines()

                    image_save_path = os.path.join(txt_data_path, basename)
                    for line in lines:
                        items = line.replace('\n', '').split(',')
                        if len(items) == 6:
                            label, label_, x, y, w, h = items
                            image_name = "%s_%s_x%s_y%s_w%s_h%s.jpg" % (
                                key, label_, x, y, w, h)
                        else:
                            label, x, y, w, h = items
                            image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y,
                                                                     w, h)

                        save_path = os.path.join(image_save_path, label)
                        if not os.path.exists(save_path):
                            os.makedirs(save_path)

                        x, y, w, h = int(x), int(y), int(w), int(h)
                        patch = slide.read_region((x, y), 0,
                                                  (w, h)).convert("RGB")

                        patch.save(os.path.join(save_path, image_name))
    except:
        return 1, key

    return 0
Ejemplo n.º 12
0
    def read_label(self, wsi_name):
        try:
            if self.os == "linux":
                from tslide.tslide import TSlide
                slide = TSlide(wsi_name)
                # print(slide.associated_images.items())
                image = slide.associated_images["label"].convert(
                    "RGB")  # RGB Image instance
                # w, h = image.size
                # image.crop((0, 0, w, h//2)).save("./res/half.jpg")
                # image.save("./res/label.jpg")
                slide.close()
                return image
            else:
                _lib = WinDLL("./lib/ImageOperationLib.dll")

                _lib.GetLableInfoPathFunc.argtypes = [
                    c_char_p,
                    POINTER(POINTER(c_ubyte)),
                    POINTER(c_int),
                    POINTER(c_int),
                    POINTER(c_int)
                ]
                _lib.GetLableInfoPathFunc.restype = c_int
                path = c_char_p(wsi_name.encode("utf-8"))
                imageData = POINTER(c_ubyte)()
                length = c_int()
                width = c_int()
                height = c_int()
                res = c_int()
                res = _lib.GetLableInfoPathFunc(path, byref(imageData),
                                                byref(length), byref(width),
                                                byref(height))
                if res:
                    # with open("./res/label_win.jpg", "wb") as f:
                    #     f.write(string_at(imageData, length.value))
                    import numpy as np
                    narray = np.ctypeslib.as_array(imageData,
                                                   shape=(length.value, ))
                    from io import BytesIO
                    buf = BytesIO(narray)
                    # Image.open(buf).save("./res/label_win.jpg")
                    return Image.open(buf)
        except:
            print(wsi_name + " cannot be processed")
        return None
Ejemplo n.º 13
0
def asap_to_image(input_file_path, output_file_path):
    """

    :param input_file_path: 输入文件路径
    :param output_file_path: 输出文件路径
    :return:
    """
    t0 = datetime.datetime.now()

    try:
        slide = openslide.OpenSlide(input_file_path)
    except:
        slide = TSlide(input_file_path)

    if slide:
        img_name = os.path.basename(input_file_path).split(".")[0]
        print("Process %s ..." % img_name)

        width, height = slide.dimensions

        # 按列读取,仅读取图像中间(指定比例)位置
        x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \
                              int(height * AVAILABLE_PATCH_START_RATIO), \
                              int(width * AVAILABLE_PATCH_END_RATIO), \
                              int(height * AVAILABLE_PATCH_END_RATIO)
        patch_num = get_patch_num(width - x, height - y)

        output_path = os.path.join(output_file_path, img_name)
        os.makedirs(output_path, exist_ok=True)

        tasks = []

        # 创建线程池
        executor = ProcessPoolExecutor(max_workers=MATTING_PROCESS_NUM)

        t00 = datetime.datetime.now()
        print("Adding Job to Pool...")
        # 切图处理
        while x < width:
            tasks.append(executor.submit(worker, input_file_path, x, y, height, output_path))
            x += DELTA
        t01 = datetime.datetime.now()
        print("Done, cost: %s" % (t01 - t00))

        print("Total Job Count: %s, Worker Count: %s" % (len(tasks), MATTING_PROCESS_NUM))
        job_count = len(tasks)
        patch_count = 0
        for future in as_completed(tasks):
            count = future.result()
            patch_count += count
            job_count -= 1
            print("One Job Done, Got %s patches, last Job Count: %s" % (count, job_count))

        t1 = datetime.datetime.now()
        print("File - %s, Size: (%s, %s), Calculate Patch Num %s, Got Patch Num %s, Total cost time: %s" % (
        img_name, width, height, patch_num, patch_count, t1 - t0))
Ejemplo n.º 14
0
def cut_cells(tiff_path, points_lst, output_dir, N=1):
    basename, _ = os.path.splitext(os.path.basename(tiff_path))

    try:
        slide = openslide.OpenSlide(tiff_path)
    except:
        slide = TSlide(tiff_path)

    for point in points_lst:
        cell_save_dir = os.path.join(output_dir, basename, point['label'])
        os.makedirs(cell_save_dir, exist_ok=True)

        x, y, w, h = point['x'], point['y'], point['w'], point['h']
        image_name = "1-p{:.10f}_{}_x{}_y{}_w{}_h{}_{}X.jpg".format(
            1 - point['accuracy'], basename, x, y, w, h, N)
        cell_save_path = os.path.join(cell_save_dir, image_name)

        slide.read_region(
            (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0,
            (int(N * w), int(N * h))).convert("RGB").save(cell_save_path)
Ejemplo n.º 15
0
def patch_worker(input_image_path, start_x, start_y, height, patch_save_path,
                 in_queue):
    """
    按指定 patch size 和 步长 对 tif 图像进行切分
    :param patch_save_path: 切图保存路径
    :param in_queue: 统计切图数量队列
    :param input_image_path: tif文件路径
    :param start_x: 切割起点坐标-x
    :param start_y: 切割起点坐标-y
    :param height: 切割区域
    :return: 无返回值
    """

    # 读取tif文件
    try:
        img_data = openslide.OpenSlide(input_image_path)
    except:
        img_data = TSlide(input_image_path)

    while start_y < height:
        # 读取patch块
        patch = img_data.read_region((start_x, start_y), 0,
                                     (PATCH_SIZE, PATCH_SIZE))

        # 图像格式转换
        patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
        in_queue.put(1)

        # 生成文件名及保存路径
        #name = get_random_string()
        save_path = os.path.join(patch_save_path,
                                 "{}_{}.jpg".format(start_x, start_y))
        # 文件写入
        cv2.imwrite(save_path, patch)

        start_y += DELTA

    # 关闭句柄
    img_data.close()
Ejemplo n.º 16
0
def cut_same_size(tif_dir, positions, save_path):
    """
        tif_dir: tif folder
        positions: {tif: [(class_i, x, y, w, h),]}
        save_path: target save path, image naming: tif_x_y_w_h.jpg
    """
    for tif, boxes in positions.items():
        #slide = openslide.OpenSlide(os.path.join(tif_dir, tif+".tif"))
        try:
            slide = openslide.OpenSlide(os.path.join(tif_dir, tif + ".tif"))
        except:
            slide = TSlide(os.path.join(tif_dir, tif + ".kfb"))
        for box in boxes:
            save_path_i = os.path.join(save_path, box[0])
            os.makedirs(save_path_i, exist_ok=True)
            x, y = box[1], box[2]
            w, h = box[3], box[4]
            cell = slide.read_region((x, y), 0, (w, h)).convert("RGB")
            cell.save(
                os.path.join(save_path_i,
                             "{}_x{}_y{}_w{}_h{}.jpg".format(tif, x, y, w, h)))
        slide.close()
        print("processed: {}".format(tif))
Ejemplo n.º 17
0
 def read_meta(self):
     """ read meta info of wsi file """
     try:
         slide = TSlide(self.wsi_file)
     except:
         slide = openslide.OpenSlide(self.wsi_file)
     level_count = slide.level_count
     self.meta['m'], self.meta['n'] = slide.level_dimensions[0]
     self.meta['mtop'], self.meta['ntop'] = slide.level_dimensions[
         level_count - 1]
     self.meta['level_downsamples'] = slide.level_downsamples[level_count -
                                                              1]
     try:
         self.meta['thumbnail'] = slide.get_thumbnail(
             (self.meta['mtop'], self.meta['ntop']))
     except:
         self.meta['thumbnail'] = slide.read_region(
             (0, 0), level_count - 1,
             (self.meta['mtop'], self.meta['ntop']))
     self.meta['thumbnail_blur'] = self.meta['thumbnail'].filter(
         ImageFilter.GaussianBlur(radius=16))
     slide.close()
def cell_sampling(xml_file, tiff_path, save_path, size, scale):

    #    print("#INFO# ", "start cp file")

    # copy remote file to local
    #    local_xml_file = cp_originfile_from_remote(xml_file, path_temp)
    #    xml_file = local_xml_file

    #    print("#INFO# ", "end cp file")

    labels = get_labels(xml_file)

    # filename = os.path.splitext(xml_file)[0]
    # if (not os.path.isfile(filename+".tif")) and (not os.path.isfile(filename+".kfb")):
    #     print(filename + " doesn't exist")
    #     return

    print("PROCESSING %s ..." % tiff_path)

    try:
        try:
            slide = openslide.OpenSlide(tiff_path)
        except:
            slide = TSlide(tiff_path)
    except:
        print("ERROR #", "can not open pic ", tiff_path)
        exit()

    size_x, size_y = slide.dimensions
    #    points_xy = get_windows(labels, size_x, size_y, size)
    # new gen method for cell base instead list all pic

    points_xy = get_windows_new(labels, size_x, size_y, size, tiff_path)
    #    print("labels: ", labels)
    #    print("points_xy: ", points_xy)
    #    print("size_x: ", size_x, "   size_y:", size_y, "   size:", size)

    # generate jpg files
    # print(filename)
    points_num = len(points_xy)
    for i, (x, y) in enumerate(points_xy):
        if ((i % 100) == 0):
            print("# process # ", i, '/', points_num - 1, x, y, size, size)
        cell = slide.read_region((x, y), 0, (size, size)).convert("RGB")

        filename, _ = os.path.splitext(os.path.basename(tiff_path))
        image_file_name = save_path + "/" + filename + "_" + str(
            x) + "_" + str(y) + ".jpg"

        # need scale pic from 1216 to 608

        cell = cell.resize((int(size * scale + 0.5), int(size * scale + 0.5)))
        cell.save(image_file_name)

        # print("#INFO# ", "get one region cost time is ", (end_get_region - start_get_region).microseconds)
        # print("#INFO# ", "save one region cost time is ", (end_save_region - end_get_region).microseconds)

    slide.close()

    # generate xml files
    new_xmls = Xml(os.path.basename(filename), save_path, points_xy, labels,
                   size, scale)
    new_xmls.gen_xml()

    #end_one_big_pic = datetime.utcnow()

    print("INFO# ", "small pics num is ", i)

    # rm temp local file
    #    rm_tempfile_from_local(local_xml_file, path_temp)

    print("INFO# ", "processed ", xml_file)
Ejemplo n.º 19
0
def main(input_file_path, task_id, output_file_path="output"):
    """

    :param task_id:
    :param input_file_path: 输入文件路径
    :param output_file_path: 输出文件路径
    :return:
    """
    # 创建输出文件目录
    os.makedirs(os.path.join(CURRENT_WORKING_PATH, output_file_path),
                exist_ok=True)

    t0 = datetime.datetime.now()
    try:
        slide = openslide.OpenSlide(input_file_path)
    except:
        slide = TSlide(input_file_path)

    if slide:
        img_name = os.path.basename(input_file_path).split(".")[0]
        print("Process %s ..." % img_name)

        width, height = slide.dimensions

        # 按列读取,仅读取图像中间(指定比例)位置
        x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \
                              int(height * AVAILABLE_PATCH_START_RATIO), \
                              int(width * AVAILABLE_PATCH_END_RATIO), \
                              int(height * AVAILABLE_PATCH_END_RATIO)
        patch_num = get_patch_num(width - x, height - y)

        output_path = os.path.join(output_file_path, img_name)
        os.makedirs(output_path, exist_ok=True)

        tasks = []

        # 创建进程池
        executor = ProcessPoolExecutor(max_workers=MATTING_PROCESS_NUM)

        t00 = datetime.datetime.now()
        print("Adding Job to Pool...")
        # 切图处理
        while x < width:
            tasks.append(
                executor.submit(worker, input_file_path, x, y, height,
                                output_path))
            x += DELTA
        t01 = datetime.datetime.now()
        print("Done, cost: %s" % (t01 - t00))

        print("Total Job Count: %s, Worker Count: %s" %
              (len(tasks), MATTING_PROCESS_NUM))
        job_count = len(tasks)
        patch_count = 0
        for future in as_completed(tasks):
            count = future.result()
            patch_count += count
            job_count -= 1
            print("One Job Done, Got %s patches, rest Job Count: %s" %
                  (count, job_count))

        t1 = datetime.datetime.now()
        print(
            "File - %s, Size: (%s, %s), Calculate Patch Num %s, Got Patch Num %s, Total cost time: %s"
            % (img_name, width, height, patch_num, patch_count, t1 - t0))
        print("Algorithm Analysing Engine Start...")
        patch_lst = FilesScanner(output_path).get_files()
        delta = 200
        batches = [
            patch_lst[i:i + delta] for i in range(0, len(patch_lst), delta)
        ]

        results = {}
        t = len(batches)
        for i, batch in enumerate(batches):
            print("init_cell_seg...")
            init_cell_seg(batch)
            print("cell_segmentation...")
            result = cell_segmentation(det_thresh=0.1)
            for key, value in result.items():
                if key in results:
                    results[key].extend(value)
                else:
                    results[key] = value

            progress = "%.2f" % ((i + 1) / t)
            print(progress)
            # update_algorithm_progress(task_id, progress)

        os.system("rm -rf %s" % output_path.replace(" ", "\ "))

        return get_result_tag(results), results
def generate_image_from_xml(xml_path, cell_save_path, tiff_dict):
    """
    从 xml 文件解析大图标注点坐标,生成细胞文件
    :param xml_path: xml 文件路径
    :param cell_save_path: 细胞文件生成目录
    :return:
    """

    DOMTree = xml.dom.minidom.parse(xml_path)
    collection = DOMTree.documentElement

    parent = collection.getElementsByTagName("Annotations")[0]
    # 原始大图路径
    tiff_file_name = parent.getAttribute("Name")
    # tiff_file_path = os.path.join(TIFF_IMAGE_RESOURCE_PATH, parent.getAttribute("FullName").replace(" ", '-'))
    xml_name, _ = os.path.splitext(os.path.basename(xml_path))
    if xml_name not in tiff_dict:
        print(xml_name, 'NOT FOUND!')
        exit()

    tiff_file_path = tiff_dict[xml_name]

    annotations = collection.getElementsByTagName("Annotation")

    # 打开失败的 TIFF 图像列表
    open_fail_records = []
    # 打开 TIFF 文件
    try:
        try:
            slide = openslide.OpenSlide(tiff_file_path)
        except:
            slide = TSlide(tiff_file_path)
    except:
        open_fail_records.append((len(annotations), tiff_file_path))
        print("TIFF OPEN FAILED => %s" % tiff_file_path)
        return tiff_file_path

    if xml_path.endswith(".kfb"):
        mpp = slide.properties['openslide.mpp-x']

        near_20x = abs(float(mpp) - 0.5)
        near_40x = abs(float(mpp) - 0.25)

        if near_20x < near_40x:
            mpp = 20
        if near_40x < near_20x:
            mpp = 40
    else:
        mpp = 20

    # class_count = dict(zip(PATHOLOGY_TYPE_CLASSES, [0] % len(PATHOLOGY_TYPE_CLASSES)))
    for index, annotation in enumerate(annotations):
        cell = annotation.getElementsByTagName("Cell")[0]
        x = int(cell.getAttribute("X"))
        y = int(cell.getAttribute("Y"))
        w = int(cell.getAttribute("W"))
        h = int(cell.getAttribute("H"))

        # expand to size * 2
        # x_, y_, w_, h_ = int(x - w / 2), int(y - h / 2), int(2 * w), int(2 * h)
        x_, y_, w_, h_ = x, y, w, h

        class_type = cell.getAttribute("Type")

        save_path = os.path.join(cell_save_path, class_type)
        if not os.path.exists(save_path):
            os.makedirs(save_path, exist_ok=True)

        image_name = "%s_x%s_y%s_w%s_h%s.bmp" % (xml_name, x_, y_, w_, h_)
        try:
            patch = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB")

            if mpp == 40:
                image = np.asarray(image)
                image = cv2.pyrDown(image)
                image = Image.fromarray(image)

            patch.save(os.path.join(save_path, image_name))
            #patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
            #cv2.imwrite(os.path.join(save_path, image_name), patch, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        except Exception as e:
            print(e)
            print(x_, y_, w_, h_)
            print(slide.dimensions)
            continue

    return None
def generate_image_from_xml(xml_path, cell_save_path, tiff_dict):
    """
    从 xml 文件解析大图标注点坐标,生成细胞文件
    :param xml_path: xml 文件路径
    :param cell_save_path: 细胞文件生成目录
    :return:
    """

    DOMTree = xml.dom.minidom.parse(xml_path)
    collection = DOMTree.documentElement

    parent = collection.getElementsByTagName("Annotations")[0]
    # 原始大图路径
    tiff_file_name = parent.getAttribute("Name")
    # tiff_file_path = os.path.join(TIFF_IMAGE_RESOURCE_PATH, parent.getAttribute("FullName").replace(" ", '-'))
    xml_name, _ = os.path.splitext(os.path.basename(xml_path))
    if xml_name not in tiff_dict:
        print(xml_name, 'NOT FOUND!')
        exit()

    tiff_file_path = tiff_dict[xml_name]

    annotations = collection.getElementsByTagName("Annotation")

    # 打开失败的 TIFF 图像列表
    open_fail_records = []
    # 打开 TIFF 文件
    try:
        try:
            slide = openslide.OpenSlide(tiff_file_path)
        except:
            slide = TSlide(tiff_file_path)
    except:
        open_fail_records.append((len(annotations), tiff_file_path))
        print("TIFF OPEN FAILED => %s" % tiff_file_path)
        return tiff_file_path

    # class_count = dict(zip(PATHOLOGY_TYPE_CLASSES, [0] % len(PATHOLOGY_TYPE_CLASSES)))
    for index, annotation in enumerate(annotations):
        cell = annotation.getElementsByTagName("Cell")[0]
        x = int(cell.getAttribute("X"))
        y = int(cell.getAttribute("Y"))
        w = int(cell.getAttribute("W"))
        h = int(cell.getAttribute("H"))

        # center_x = x + int(w / 2)
        # center_y = y + int(h / 2)
        #
        # line_length = max(w, h)

        # x_ = center_x - int(line_length / 2)
        # y_ = center_y - int(line_length / 2)
        # w_, h_ = line_length, line_length

        size = 608

        x_, y_, w_, h_ = int(x + w / 2 -
                             size / 2), int(y + h / 2 -
                                            size / 2), int(size), int(size)

        class_type = cell.getAttribute("Type")

        save_path = os.path.join(cell_save_path, class_type)
        if not os.path.exists(save_path):
            os.makedirs(save_path, exist_ok=True)

        image_name = "%s_x%s_y%s_w%s_h%s.bmp" % (xml_name, x_, y_, w_, h_)
        try:
            patch = slide.read_region((x_, y_), 0, (w_, h_))
            patch = patch.convert("RGB")
            patch.save(os.path.join(save_path, image_name))
            #patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR)
            #cv2.imwrite(os.path.join(save_path, image_name), patch, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        except Exception as e:
            print(e)
            print(x_, y_, w_, h_)
            print(slide.dimensions)
            continue

    return None
Ejemplo n.º 22
0
        image_data = point['cell_image']

        basename, _ = os.path.splitext(os.path.basename(slide_path))
        save_path = os.path.join(output, basename, xcp_label)
        os.makedirs(save_path, exist_ok=True)
        image_name = "%.4f_%s_%.4f_%s_%s_%s_%s_.jpg" % (xcp_accu, yolo_label,
                                                        yolo_accu, x, y, w, h)

        # get image data program
        image_data.save(os.path.join(save_path, image_name))

        # get image from slide
        image_name = "%.4f_%s_%.4f_%s_%s_%s_%s.jpg" % (xcp_accu, yolo_label,
                                                       yolo_accu, x, y, w, h)
        slide.read_region((point['x'], point['y']), 0,
                          (point['w'], point['h'])).convert("RGB").save(
                              os.path.join(save_path, image_name))


if __name__ == '__main__':
    # points_lst = []
    # slide_path = ""
    # output = ""
    # matting_job(points_lst, slide_path, output)

    path = ''
    try:
        slide = openslide.OpenSlide(path)
    except:
        slide = TSlide(path)
Ejemplo n.º 23
0
def remove_repeat_cells(key, csv_file_path):
    if key not in tiff_dict:
        raise Exception("XCEPTION PREPROCESS %s NOT FOUND" % key)

    tiff_path = tiff_dict[key]

    try:
        try:
            slide = openslide.OpenSlide(tiff_path)
        except:
            slide = TSlide(tiff_path)
    except:
        raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path)

    save_path = os.path.join(TEST_IMAGE_SAVE_PATH, key)

    with open(csv_file_path) as f:
        lines = csv.reader(f)

        dict_ = {}
        unique_cells_collection = []

        next(lines, None)

        count = 0
        for line in lines:
            name, label01, accu01, label02, accu02, xmin, ymin, xmax, ymax = line
            xmin, ymin, xmax, ymax = float(xmin), float(ymin), float(
                xmax), float(ymax)
            x, y, w, h = xmin, ymin, int(xmax - xmin + 0.5), int(ymax - ymin +
                                                                 0.5)

            tiff_name, start_x, start_y = re.findall(pattern, name)[0]
            start_x, start_y = int(start_x), int(start_y)

            x, y = int(start_x + x), int(start_y + y)

            origin_save_path = os.path.join(save_path, "origin", label02)
            removal_save_path = os.path.join(save_path, "removal", label02)

            if not os.path.exists(origin_save_path):
                os.makedirs(origin_save_path)

            if not os.path.exists(removal_save_path):
                os.makedirs(removal_save_path)

            patch = slide.read_region((x, y), 0, (w, h)).convert("RGB")
            image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y, w, h)

            patch.save(os.path.join(origin_save_path, image_name))
            for item in unique_cells_collection:
                label, x_, y_, w_, h_ = item

                ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_))

                if ratio > 0.7 and label == label02:
                    break
            else:
                unique_cells_collection.append((label02, x, y, w, h))
                patch.save(os.path.join(removal_save_path, image_name))

            count += 1

        print("ORIGIN POINTS COLLECTION LENGTH: %s" % count)
        print("AFTER DUPLICATE REMOVAL COLLECTION LENGTH: %s" %
              len(unique_cells_collection))

        return unique_cells_collection
Ejemplo n.º 24
0
def control_center(tifs, output_file_path):
    """
    多线程切分控制方法
    :param tifs: 待切割 tif 文件路径
    :param output_path: 输出 patch 文件路径
    :return: 无
    """

    t0 = datetime.datetime.now()
    for tif in tifs:
        # 读取图像
        try:
            try:
                slide = openslide.OpenSlide(tif)
            except:
                slide = TSlide(tif)

            if slide:
                t1 = datetime.datetime.now()
                img_name = os.path.basename(tif).split(".")[0]
                print("Process %s ..." % img_name)

                # 采用多进程,线程数默认为CPU核心数
                pool = Pool()

                # 统计切图数量
                in_queue = Manager().Queue()

                width, height = slide.dimensions

                # 按列读取,仅读取图像中间(指定比例)位置
                x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \
                                      int(height * AVAILABLE_PATCH_START_RATIO), \
                                      int(width * AVAILABLE_PATCH_END_RATIO), \
                                      int(height * AVAILABLE_PATCH_END_RATIO)
                patch_num = get_patch_num(width - x, height - y)

                output_path = os.path.join(output_file_path, img_name)
                os.makedirs(output_path, exist_ok=True)

                # 切图处理
                while x < width:
                    pool.apply_async(
                        patch_worker,
                        (tif, x, y, height, output_path, in_queue))
                    x += DELTA

                while in_queue.qsize() + 10 < patch_num:
                    sleep(3)
                    print("%s / %s" % (in_queue.qsize(), patch_num))

                pool.close()
                pool.join()

                print("Calculate Patch Num %s, InCome Patch Num %s" %
                      (patch_num, in_queue.qsize()))

                t2 = datetime.datetime.now()
                print("File - %s, Size: (%s, %s), Total cost time: %s" %
                      (img_name, width, height, t2 - t1))
        except Exception as e:
            print(str(e))

        # 关闭句柄
        slide.close()

    t3 = datetime.datetime.now()
    print("TIF FILES NUM %s, TOTAL TIME COST %s" % (len(tifs), (t3 - t0)))