def worker(input_file_path, start_x, start_y, height, patch_save_path): """ :param input_file_path: 输入文件路径 :param start_x: :param start_y: :param height: :param patch_save_path: :return: """ count = 0 try: slide = openslide.OpenSlide(input_file_path) except: slide = TSlide(input_file_path) while start_y < height: # 读取patch块 patch = slide.read_region((start_x, start_y), 0, (PATCH_SIZE, PATCH_SIZE)) # 图像格式转换 patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR) # 生成文件名及保存路径 name = get_random_string() save_path = os.path.join(patch_save_path, "%s.jpg" % name) # 文件写入 cv2.imwrite(save_path, patch) start_y += DELTA count += 1 return count
def cut_fixed_size(tif_dir, positions, size, save_path): """ tif_dir: tif folder positions: {tif: [(class_i, x, y, w, h),]} size: target jpg size save_path: target save path """ def get_x_y(box, size): x_center = box[1] + box[3] / 2.0 y_center = box[2] + box[4] / 2.0 x = int(x_center - size / 2.0) y = int(y_center - size / 2.0) return (x, y) for tif, boxes in positions.items(): #slide = openslide.OpenSlide(os.path.join(tif_dir, tif+".tif")) try: slide = openslide.OpenSlide(os.path.join(tif_dir, tif + ".tif")) except: slide = TSlide(os.path.join(tif_dir, tif + ".kfb")) for box in boxes: save_path_i = os.path.join(save_path, box[0]) os.makedirs(save_path_i, exist_ok=True) x, y = get_x_y(box, size) cell = slide.read_region((x, y), 0, (size, size)).convert("RGB") scipy.misc.imsave( os.path.join(save_path_i, "{}_{}_{}.jpg".format(tif, x, y)), cell) slide.close() print("processed: {}".format(tif))
def matting_job(points_lst, slide_path, output): try: slide = openslide.OpenSlide(slide_path) except: slide = TSlide(slide_path) for point in points_lst: yolo_label = point['yolo_cell_class'] yolo_accu = point['yolo_cell_det'] xcp_label = point['xcp_cell_class'] xcp_accu = point['xcp_cell_class_det'] x = point['x'] y = point['y'] w = point['w'] h = point['h'] image_data = point['cell_image'] basename, _ = os.path.splitext(os.path.basename(slide_path)) save_path = os.path.join(output, basename, xcp_label) os.makedirs(save_path, exist_ok=True) image_name = "%.4f_%s_%.4f_%s_%s_%s_%s_.jpg" % (xcp_accu, yolo_label, yolo_accu, x, y, w, h) # get image data program image_data.save(os.path.join(save_path, image_name)) # get image from slide image_name = "%.4f_%s_%.4f_%s_%s_%s_%s.jpg" % (xcp_accu, yolo_label, yolo_accu, x, y, w, h) slide.read_region((point['x'], point['y']), 0, (point['w'], point['h'])).convert("RGB").save( os.path.join(save_path, image_name))
def cell_sampling(xml_file, save_path, size): labels = get_labels(xml_file) # print(labels) filename = os.path.splitext(xml_file)[0] if (not os.path.isfile(filename + ".tif")) and ( not os.path.isfile(filename + ".kfb")): print(filename + " doesn't exist") return try: slide = openslide.OpenSlide(filename + ".tif") except: slide = TSlide(filename + ".kfb") size_x, size_y = slide.dimensions points_xy = get_windows(labels, size_x, size_y, size) # print(points_xy) # generate jpg files for (x, y) in points_xy: cell = slide.read_region((x, y), 0, (size, size)).convert("RGB") cell.save(save_path + "/" + os.path.basename(filename) + "_" + str(x) + "_" + str(y) + ".jpg") slide.close() # generate xml files new_xmls = Xml(os.path.basename(filename), save_path, points_xy, labels, size) new_xmls.gen_xml() print("processed ", xml_file)
def worker(tiff_file_path, patch_save_path, patch_range=(0, 1), patch_num_need=PATCH_NUM_NEED, path_size=(PATCH_SIZE, PATCH_SIZE)): """ 随机切图 worker, :param tiff_file_path: TIFF 文件路径 :param patch_save_path: 切图存放路径 :param patch_range: 切图范围,默认(0,1)全幅图像 :param patch_num_need: 随机切图数量 :param path_size: 随机切图尺寸 (1024, 1024) :return: """ # 获取 TIFF 文件句柄 try: try: slide = openslide.OpenSlide(tiff_file_path) except: slide = TSlide(tiff_file_path) except: print("TIFF OPEN FAILED => %s" % tiff_file_path) return 1, tiff_file_path width, height = slide.dimensions # 获取随机坐标列表 x_lst x_lst = [ int(random.uniform(patch_range[0], patch_range[1]) * width) for _ in range(patch_num_need) ] # 获取随机坐标列表 y_lst y_lst = [ int(random.uniform(patch_range[0], patch_range[1]) * height) for _ in range(patch_num_need) ] random_point_lst = list(zip(x_lst, y_lst)) # 获取大图文件名 basename, _ = os.path.splitext(os.path.basename(tiff_file_path)) # 切图大小 w, h = path_size for index, (x, y) in enumerate(random_point_lst): save_path = os.path.join(patch_save_path, basename) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) # 保存 PATCH image_name = "%s_x%s_y%s_w%s_h%s_s%s.jpg" % (basename, x, y, w, h, index) slide.read_region( (x, y), 0, (w, h)).convert("RGB").save(os.path.join(save_path, image_name)) return 0, None
def cut_cells(xml_file, save_path): # from .xml filename, get .tif/.kfb filename filename = os.path.splitext(xml_file)[0] try: slide = openslide.OpenSlide(filename + ".tif") except: slide = TSlide(filename + ".kfb") basename = os.path.basename(filename) # open .xml file DOMTree = xml.dom.minidom.parse(xml_file) collection = DOMTree.documentElement annotations = collection.getElementsByTagName("Annotation") for annotation in annotations: coordinates = annotation.getElementsByTagName("Coordinate") # read (x, y) coordinates x_coords = [float(coordinate.getAttribute("X")) for coordinate in coordinates] y_coords = [float(coordinate.getAttribute("Y")) for coordinate in coordinates] # get mininum-area-bounding-rectangle x_min = min(x_coords) x_max = max(x_coords) y_min = min(y_coords) y_max = max(y_coords) # 2 times the size of marked region x = int(1.5 * x_min - 0.5 * x_max) y = int(1.5 * y_min - 0.5 * y_max) x_size = int(2 * (x_max - x_min)) y_size = int(2 * (y_max - y_min)) # # take out the size as it is # x = int(x_min) # y = int(y_min) # x_size = int(x_max - x_min) # y_size = int(y_max - y_min) # if annotation.getAttribute("Color") in classes: # cell_path = os.path.join(save_path, basename) # os.makedirs(cell_path, exist_ok=True) cell_path = save_path cell_name = "{}_x{}_y{}_w{}_h{}.bmp".format(basename, int(x_min), int(y_min), int(x_max-x_min), int(y_max-y_min)) cell_path_name = os.path.join(cell_path, cell_name) cell = slide.read_region((x, y), 0, (x_size, y_size)).convert("RGB") cell.save(cell_path_name) # save yolo-txt txt_path_name = os.path.splitext(cell_path_name)[0] + ".txt" with open(txt_path_name, 'w') as f: f.write("0 0.50 0.50 0.50 0.50\n") slide.close()
def cell_sampling(xml_path, tif_path, save_path): labels = get_labels(xml_path) # return if no cells to cut if not labels: return try: try: slide = openslide.OpenSlide(tif_path) except: slide = TSlide(tif_path) except: print("ERROR: can not open pic ", tif_path) exit() basename = os.path.splitext(os.path.basename(xml_path))[0] for label in labels: window = slide.read_region( (label["x_win"], label["y_win"]), 0, (label["w_win"], label["h_win"])).convert("RGB") window = np.asarray(window) window = cv2.cvtColor(window, cv2.COLOR_RGB2BGR) window = cv2.pyrDown(window) basename_new = "{}_x{}_y{}".format(basename, label["x"], label["y"]) # save image win_path = os.path.join(save_path, label["class_i"], basename_new + ".bmp") os.makedirs(os.path.dirname(win_path), exist_ok=True) cv2.imwrite(win_path, window) # save coordinates info txt_path = os.path.join(save_path, label["class_i"], basename_new + ".txt") values = [] for add_label in label["add_labels"]: values.append([ add_label["class_i"], add_label["dx"] / 2, add_label["dy"] / 2, (add_label["x_max"] - add_label["x_min"]) / 2, (add_label["y_max"] - add_label["y_min"]) / 2 ]) # values = [label["class_i"], label["dx"]/2, label["dy"]/2, label["w"]/2, label["h"]/2] with open(txt_path, 'w') as f: for value in values: f.write(' '.join([str(a) for a in value]) + '\n') slide.close() print("finished cutting {}, # cells: {}".format(tif_path, len(labels)))
def cut_cells(xml_file, save_path, size, position): # get basename, without extension basename = os.path.splitext(os.path.basename(xml_file))[0] wsi_name = os.path.splitext(xml_file)[0] if (not os.path.isfile(wsi_name + ".tif")) and ( not os.path.isfile(wsi_name + ".kfb")): print(wsi_name + " doesn't exist") return try: slide = openslide.OpenSlide(wsi_name + ".tif") except: slide = TSlide(wsi_name + ".kfb") # open .xml file DOMTree = xml.dom.minidom.parse(xml_file) collection = DOMTree.documentElement annotations = collection.getElementsByTagName("Annotation") count = 0 for annotation in annotations: if annotation.getAttribute("Color") in colors: coordinates = annotation.getElementsByTagName("Coordinate") # read (x, y) coordinates x_coords = [ float(coordinate.getAttribute("X")) for coordinate in coordinates ] y_coords = [ float(coordinate.getAttribute("Y")) for coordinate in coordinates ] # get the (x, y) coordinates for read_region() x, y = get_xy( (min(x_coords), min(y_coords), max(x_coords), max(y_coords)), size, position) save_path_i = os.path.join( save_path, basename, colors[annotation.getAttribute("Color")]) os.makedirs(save_path_i, exist_ok=True) cell = slide.read_region((x, y), 0, (size, size)).convert("RGB") cell.save( os.path.join( save_path_i, "{}_x{}_y{}_px{}_py{}.jpg".format(basename, int(min(x_coords)), int(min(y_coords)), position[0], position[1]))) count += 1 slide.close() print("{}: number of cells {}. image size {}, position {}".format( basename, count, size, position))
def batch_process(self, images_pre): """ batch cut images :param images_pre: [[class_i,(xmin,ymin,xmax,ymax),((xmin_z,ymin_z),(xmax_z,ymax_z))],] :return: [[class_i,(xmin,ymin,xmax,ymax),image],] """ try: slide = TSlide(self.wsi_file) except: slide = openslide.OpenSlide(self.wsi_file) images = [] for image_i in images_pre: image = slide.read_region(image_i[2][0], 0, image_i[2][1]).convert("RGB") images.append([image_i[0], image_i[1], image]) slide.close() return images
def get_cell_by_N(self, box, N): """ cut single image, given coordiantes box and size scaler N """ # calculate new box_z x, y = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] x_cut, y_cut = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2) w_cut, h_cut = int(N * w), int(N * h) # cut Nx sized image try: slide = TSlide(self.wsi_file) except: slide = openslide.OpenSlide(self.wsi_file) image = slide.read_region((x_cut, y_cut), 0, (w_cut, h_cut)).convert("RGB") slide.close() return image
def generate_classified_cell_images(key, classified_data_save_path): tiff_path = tiff_dict[key] try: try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) except: raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path) txt_data_path = os.path.join(classified_data_save_path, key) txts = os.listdir(txt_data_path) try: for txt in txts: if txt.endswith('.txt'): basename, _ = os.path.splitext(os.path.basename(txt)) with open(os.path.join(txt_data_path, txt)) as f: lines = f.readlines() image_save_path = os.path.join(txt_data_path, basename) for line in lines: items = line.replace('\n', '').split(',') if len(items) == 6: label, label_, x, y, w, h = items image_name = "%s_%s_x%s_y%s_w%s_h%s.jpg" % ( key, label_, x, y, w, h) else: label, x, y, w, h = items image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y, w, h) save_path = os.path.join(image_save_path, label) if not os.path.exists(save_path): os.makedirs(save_path) x, y, w, h = int(x), int(y), int(w), int(h) patch = slide.read_region((x, y), 0, (w, h)).convert("RGB") patch.save(os.path.join(save_path, image_name)) except: return 1, key return 0
def read_label(self, wsi_name): try: if self.os == "linux": from tslide.tslide import TSlide slide = TSlide(wsi_name) # print(slide.associated_images.items()) image = slide.associated_images["label"].convert( "RGB") # RGB Image instance # w, h = image.size # image.crop((0, 0, w, h//2)).save("./res/half.jpg") # image.save("./res/label.jpg") slide.close() return image else: _lib = WinDLL("./lib/ImageOperationLib.dll") _lib.GetLableInfoPathFunc.argtypes = [ c_char_p, POINTER(POINTER(c_ubyte)), POINTER(c_int), POINTER(c_int), POINTER(c_int) ] _lib.GetLableInfoPathFunc.restype = c_int path = c_char_p(wsi_name.encode("utf-8")) imageData = POINTER(c_ubyte)() length = c_int() width = c_int() height = c_int() res = c_int() res = _lib.GetLableInfoPathFunc(path, byref(imageData), byref(length), byref(width), byref(height)) if res: # with open("./res/label_win.jpg", "wb") as f: # f.write(string_at(imageData, length.value)) import numpy as np narray = np.ctypeslib.as_array(imageData, shape=(length.value, )) from io import BytesIO buf = BytesIO(narray) # Image.open(buf).save("./res/label_win.jpg") return Image.open(buf) except: print(wsi_name + " cannot be processed") return None
def asap_to_image(input_file_path, output_file_path): """ :param input_file_path: 输入文件路径 :param output_file_path: 输出文件路径 :return: """ t0 = datetime.datetime.now() try: slide = openslide.OpenSlide(input_file_path) except: slide = TSlide(input_file_path) if slide: img_name = os.path.basename(input_file_path).split(".")[0] print("Process %s ..." % img_name) width, height = slide.dimensions # 按列读取,仅读取图像中间(指定比例)位置 x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \ int(height * AVAILABLE_PATCH_START_RATIO), \ int(width * AVAILABLE_PATCH_END_RATIO), \ int(height * AVAILABLE_PATCH_END_RATIO) patch_num = get_patch_num(width - x, height - y) output_path = os.path.join(output_file_path, img_name) os.makedirs(output_path, exist_ok=True) tasks = [] # 创建线程池 executor = ProcessPoolExecutor(max_workers=MATTING_PROCESS_NUM) t00 = datetime.datetime.now() print("Adding Job to Pool...") # 切图处理 while x < width: tasks.append(executor.submit(worker, input_file_path, x, y, height, output_path)) x += DELTA t01 = datetime.datetime.now() print("Done, cost: %s" % (t01 - t00)) print("Total Job Count: %s, Worker Count: %s" % (len(tasks), MATTING_PROCESS_NUM)) job_count = len(tasks) patch_count = 0 for future in as_completed(tasks): count = future.result() patch_count += count job_count -= 1 print("One Job Done, Got %s patches, last Job Count: %s" % (count, job_count)) t1 = datetime.datetime.now() print("File - %s, Size: (%s, %s), Calculate Patch Num %s, Got Patch Num %s, Total cost time: %s" % ( img_name, width, height, patch_num, patch_count, t1 - t0))
def cut_cells(tiff_path, points_lst, output_dir, N=1): basename, _ = os.path.splitext(os.path.basename(tiff_path)) try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) for point in points_lst: cell_save_dir = os.path.join(output_dir, basename, point['label']) os.makedirs(cell_save_dir, exist_ok=True) x, y, w, h = point['x'], point['y'], point['w'], point['h'] image_name = "1-p{:.10f}_{}_x{}_y{}_w{}_h{}_{}X.jpg".format( 1 - point['accuracy'], basename, x, y, w, h, N) cell_save_path = os.path.join(cell_save_dir, image_name) slide.read_region( (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0, (int(N * w), int(N * h))).convert("RGB").save(cell_save_path)
def patch_worker(input_image_path, start_x, start_y, height, patch_save_path, in_queue): """ 按指定 patch size 和 步长 对 tif 图像进行切分 :param patch_save_path: 切图保存路径 :param in_queue: 统计切图数量队列 :param input_image_path: tif文件路径 :param start_x: 切割起点坐标-x :param start_y: 切割起点坐标-y :param height: 切割区域 :return: 无返回值 """ # 读取tif文件 try: img_data = openslide.OpenSlide(input_image_path) except: img_data = TSlide(input_image_path) while start_y < height: # 读取patch块 patch = img_data.read_region((start_x, start_y), 0, (PATCH_SIZE, PATCH_SIZE)) # 图像格式转换 patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR) in_queue.put(1) # 生成文件名及保存路径 #name = get_random_string() save_path = os.path.join(patch_save_path, "{}_{}.jpg".format(start_x, start_y)) # 文件写入 cv2.imwrite(save_path, patch) start_y += DELTA # 关闭句柄 img_data.close()
def cut_same_size(tif_dir, positions, save_path): """ tif_dir: tif folder positions: {tif: [(class_i, x, y, w, h),]} save_path: target save path, image naming: tif_x_y_w_h.jpg """ for tif, boxes in positions.items(): #slide = openslide.OpenSlide(os.path.join(tif_dir, tif+".tif")) try: slide = openslide.OpenSlide(os.path.join(tif_dir, tif + ".tif")) except: slide = TSlide(os.path.join(tif_dir, tif + ".kfb")) for box in boxes: save_path_i = os.path.join(save_path, box[0]) os.makedirs(save_path_i, exist_ok=True) x, y = box[1], box[2] w, h = box[3], box[4] cell = slide.read_region((x, y), 0, (w, h)).convert("RGB") cell.save( os.path.join(save_path_i, "{}_x{}_y{}_w{}_h{}.jpg".format(tif, x, y, w, h))) slide.close() print("processed: {}".format(tif))
def read_meta(self): """ read meta info of wsi file """ try: slide = TSlide(self.wsi_file) except: slide = openslide.OpenSlide(self.wsi_file) level_count = slide.level_count self.meta['m'], self.meta['n'] = slide.level_dimensions[0] self.meta['mtop'], self.meta['ntop'] = slide.level_dimensions[ level_count - 1] self.meta['level_downsamples'] = slide.level_downsamples[level_count - 1] try: self.meta['thumbnail'] = slide.get_thumbnail( (self.meta['mtop'], self.meta['ntop'])) except: self.meta['thumbnail'] = slide.read_region( (0, 0), level_count - 1, (self.meta['mtop'], self.meta['ntop'])) self.meta['thumbnail_blur'] = self.meta['thumbnail'].filter( ImageFilter.GaussianBlur(radius=16)) slide.close()
def cell_sampling(xml_file, tiff_path, save_path, size, scale): # print("#INFO# ", "start cp file") # copy remote file to local # local_xml_file = cp_originfile_from_remote(xml_file, path_temp) # xml_file = local_xml_file # print("#INFO# ", "end cp file") labels = get_labels(xml_file) # filename = os.path.splitext(xml_file)[0] # if (not os.path.isfile(filename+".tif")) and (not os.path.isfile(filename+".kfb")): # print(filename + " doesn't exist") # return print("PROCESSING %s ..." % tiff_path) try: try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) except: print("ERROR #", "can not open pic ", tiff_path) exit() size_x, size_y = slide.dimensions # points_xy = get_windows(labels, size_x, size_y, size) # new gen method for cell base instead list all pic points_xy = get_windows_new(labels, size_x, size_y, size, tiff_path) # print("labels: ", labels) # print("points_xy: ", points_xy) # print("size_x: ", size_x, " size_y:", size_y, " size:", size) # generate jpg files # print(filename) points_num = len(points_xy) for i, (x, y) in enumerate(points_xy): if ((i % 100) == 0): print("# process # ", i, '/', points_num - 1, x, y, size, size) cell = slide.read_region((x, y), 0, (size, size)).convert("RGB") filename, _ = os.path.splitext(os.path.basename(tiff_path)) image_file_name = save_path + "/" + filename + "_" + str( x) + "_" + str(y) + ".jpg" # need scale pic from 1216 to 608 cell = cell.resize((int(size * scale + 0.5), int(size * scale + 0.5))) cell.save(image_file_name) # print("#INFO# ", "get one region cost time is ", (end_get_region - start_get_region).microseconds) # print("#INFO# ", "save one region cost time is ", (end_save_region - end_get_region).microseconds) slide.close() # generate xml files new_xmls = Xml(os.path.basename(filename), save_path, points_xy, labels, size, scale) new_xmls.gen_xml() #end_one_big_pic = datetime.utcnow() print("INFO# ", "small pics num is ", i) # rm temp local file # rm_tempfile_from_local(local_xml_file, path_temp) print("INFO# ", "processed ", xml_file)
def main(input_file_path, task_id, output_file_path="output"): """ :param task_id: :param input_file_path: 输入文件路径 :param output_file_path: 输出文件路径 :return: """ # 创建输出文件目录 os.makedirs(os.path.join(CURRENT_WORKING_PATH, output_file_path), exist_ok=True) t0 = datetime.datetime.now() try: slide = openslide.OpenSlide(input_file_path) except: slide = TSlide(input_file_path) if slide: img_name = os.path.basename(input_file_path).split(".")[0] print("Process %s ..." % img_name) width, height = slide.dimensions # 按列读取,仅读取图像中间(指定比例)位置 x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \ int(height * AVAILABLE_PATCH_START_RATIO), \ int(width * AVAILABLE_PATCH_END_RATIO), \ int(height * AVAILABLE_PATCH_END_RATIO) patch_num = get_patch_num(width - x, height - y) output_path = os.path.join(output_file_path, img_name) os.makedirs(output_path, exist_ok=True) tasks = [] # 创建进程池 executor = ProcessPoolExecutor(max_workers=MATTING_PROCESS_NUM) t00 = datetime.datetime.now() print("Adding Job to Pool...") # 切图处理 while x < width: tasks.append( executor.submit(worker, input_file_path, x, y, height, output_path)) x += DELTA t01 = datetime.datetime.now() print("Done, cost: %s" % (t01 - t00)) print("Total Job Count: %s, Worker Count: %s" % (len(tasks), MATTING_PROCESS_NUM)) job_count = len(tasks) patch_count = 0 for future in as_completed(tasks): count = future.result() patch_count += count job_count -= 1 print("One Job Done, Got %s patches, rest Job Count: %s" % (count, job_count)) t1 = datetime.datetime.now() print( "File - %s, Size: (%s, %s), Calculate Patch Num %s, Got Patch Num %s, Total cost time: %s" % (img_name, width, height, patch_num, patch_count, t1 - t0)) print("Algorithm Analysing Engine Start...") patch_lst = FilesScanner(output_path).get_files() delta = 200 batches = [ patch_lst[i:i + delta] for i in range(0, len(patch_lst), delta) ] results = {} t = len(batches) for i, batch in enumerate(batches): print("init_cell_seg...") init_cell_seg(batch) print("cell_segmentation...") result = cell_segmentation(det_thresh=0.1) for key, value in result.items(): if key in results: results[key].extend(value) else: results[key] = value progress = "%.2f" % ((i + 1) / t) print(progress) # update_algorithm_progress(task_id, progress) os.system("rm -rf %s" % output_path.replace(" ", "\ ")) return get_result_tag(results), results
def generate_image_from_xml(xml_path, cell_save_path, tiff_dict): """ 从 xml 文件解析大图标注点坐标,生成细胞文件 :param xml_path: xml 文件路径 :param cell_save_path: 细胞文件生成目录 :return: """ DOMTree = xml.dom.minidom.parse(xml_path) collection = DOMTree.documentElement parent = collection.getElementsByTagName("Annotations")[0] # 原始大图路径 tiff_file_name = parent.getAttribute("Name") # tiff_file_path = os.path.join(TIFF_IMAGE_RESOURCE_PATH, parent.getAttribute("FullName").replace(" ", '-')) xml_name, _ = os.path.splitext(os.path.basename(xml_path)) if xml_name not in tiff_dict: print(xml_name, 'NOT FOUND!') exit() tiff_file_path = tiff_dict[xml_name] annotations = collection.getElementsByTagName("Annotation") # 打开失败的 TIFF 图像列表 open_fail_records = [] # 打开 TIFF 文件 try: try: slide = openslide.OpenSlide(tiff_file_path) except: slide = TSlide(tiff_file_path) except: open_fail_records.append((len(annotations), tiff_file_path)) print("TIFF OPEN FAILED => %s" % tiff_file_path) return tiff_file_path if xml_path.endswith(".kfb"): mpp = slide.properties['openslide.mpp-x'] near_20x = abs(float(mpp) - 0.5) near_40x = abs(float(mpp) - 0.25) if near_20x < near_40x: mpp = 20 if near_40x < near_20x: mpp = 40 else: mpp = 20 # class_count = dict(zip(PATHOLOGY_TYPE_CLASSES, [0] % len(PATHOLOGY_TYPE_CLASSES))) for index, annotation in enumerate(annotations): cell = annotation.getElementsByTagName("Cell")[0] x = int(cell.getAttribute("X")) y = int(cell.getAttribute("Y")) w = int(cell.getAttribute("W")) h = int(cell.getAttribute("H")) # expand to size * 2 # x_, y_, w_, h_ = int(x - w / 2), int(y - h / 2), int(2 * w), int(2 * h) x_, y_, w_, h_ = x, y, w, h class_type = cell.getAttribute("Type") save_path = os.path.join(cell_save_path, class_type) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) image_name = "%s_x%s_y%s_w%s_h%s.bmp" % (xml_name, x_, y_, w_, h_) try: patch = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB") if mpp == 40: image = np.asarray(image) image = cv2.pyrDown(image) image = Image.fromarray(image) patch.save(os.path.join(save_path, image_name)) #patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR) #cv2.imwrite(os.path.join(save_path, image_name), patch, [int(cv2.IMWRITE_JPEG_QUALITY), 95]) except Exception as e: print(e) print(x_, y_, w_, h_) print(slide.dimensions) continue return None
def generate_image_from_xml(xml_path, cell_save_path, tiff_dict): """ 从 xml 文件解析大图标注点坐标,生成细胞文件 :param xml_path: xml 文件路径 :param cell_save_path: 细胞文件生成目录 :return: """ DOMTree = xml.dom.minidom.parse(xml_path) collection = DOMTree.documentElement parent = collection.getElementsByTagName("Annotations")[0] # 原始大图路径 tiff_file_name = parent.getAttribute("Name") # tiff_file_path = os.path.join(TIFF_IMAGE_RESOURCE_PATH, parent.getAttribute("FullName").replace(" ", '-')) xml_name, _ = os.path.splitext(os.path.basename(xml_path)) if xml_name not in tiff_dict: print(xml_name, 'NOT FOUND!') exit() tiff_file_path = tiff_dict[xml_name] annotations = collection.getElementsByTagName("Annotation") # 打开失败的 TIFF 图像列表 open_fail_records = [] # 打开 TIFF 文件 try: try: slide = openslide.OpenSlide(tiff_file_path) except: slide = TSlide(tiff_file_path) except: open_fail_records.append((len(annotations), tiff_file_path)) print("TIFF OPEN FAILED => %s" % tiff_file_path) return tiff_file_path # class_count = dict(zip(PATHOLOGY_TYPE_CLASSES, [0] % len(PATHOLOGY_TYPE_CLASSES))) for index, annotation in enumerate(annotations): cell = annotation.getElementsByTagName("Cell")[0] x = int(cell.getAttribute("X")) y = int(cell.getAttribute("Y")) w = int(cell.getAttribute("W")) h = int(cell.getAttribute("H")) # center_x = x + int(w / 2) # center_y = y + int(h / 2) # # line_length = max(w, h) # x_ = center_x - int(line_length / 2) # y_ = center_y - int(line_length / 2) # w_, h_ = line_length, line_length size = 608 x_, y_, w_, h_ = int(x + w / 2 - size / 2), int(y + h / 2 - size / 2), int(size), int(size) class_type = cell.getAttribute("Type") save_path = os.path.join(cell_save_path, class_type) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) image_name = "%s_x%s_y%s_w%s_h%s.bmp" % (xml_name, x_, y_, w_, h_) try: patch = slide.read_region((x_, y_), 0, (w_, h_)) patch = patch.convert("RGB") patch.save(os.path.join(save_path, image_name)) #patch = cv2.cvtColor(np.asarray(patch), cv2.COLOR_RGBA2BGR) #cv2.imwrite(os.path.join(save_path, image_name), patch, [int(cv2.IMWRITE_JPEG_QUALITY), 95]) except Exception as e: print(e) print(x_, y_, w_, h_) print(slide.dimensions) continue return None
image_data = point['cell_image'] basename, _ = os.path.splitext(os.path.basename(slide_path)) save_path = os.path.join(output, basename, xcp_label) os.makedirs(save_path, exist_ok=True) image_name = "%.4f_%s_%.4f_%s_%s_%s_%s_.jpg" % (xcp_accu, yolo_label, yolo_accu, x, y, w, h) # get image data program image_data.save(os.path.join(save_path, image_name)) # get image from slide image_name = "%.4f_%s_%.4f_%s_%s_%s_%s.jpg" % (xcp_accu, yolo_label, yolo_accu, x, y, w, h) slide.read_region((point['x'], point['y']), 0, (point['w'], point['h'])).convert("RGB").save( os.path.join(save_path, image_name)) if __name__ == '__main__': # points_lst = [] # slide_path = "" # output = "" # matting_job(points_lst, slide_path, output) path = '' try: slide = openslide.OpenSlide(path) except: slide = TSlide(path)
def remove_repeat_cells(key, csv_file_path): if key not in tiff_dict: raise Exception("XCEPTION PREPROCESS %s NOT FOUND" % key) tiff_path = tiff_dict[key] try: try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) except: raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path) save_path = os.path.join(TEST_IMAGE_SAVE_PATH, key) with open(csv_file_path) as f: lines = csv.reader(f) dict_ = {} unique_cells_collection = [] next(lines, None) count = 0 for line in lines: name, label01, accu01, label02, accu02, xmin, ymin, xmax, ymax = line xmin, ymin, xmax, ymax = float(xmin), float(ymin), float( xmax), float(ymax) x, y, w, h = xmin, ymin, int(xmax - xmin + 0.5), int(ymax - ymin + 0.5) tiff_name, start_x, start_y = re.findall(pattern, name)[0] start_x, start_y = int(start_x), int(start_y) x, y = int(start_x + x), int(start_y + y) origin_save_path = os.path.join(save_path, "origin", label02) removal_save_path = os.path.join(save_path, "removal", label02) if not os.path.exists(origin_save_path): os.makedirs(origin_save_path) if not os.path.exists(removal_save_path): os.makedirs(removal_save_path) patch = slide.read_region((x, y), 0, (w, h)).convert("RGB") image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y, w, h) patch.save(os.path.join(origin_save_path, image_name)) for item in unique_cells_collection: label, x_, y_, w_, h_ = item ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_)) if ratio > 0.7 and label == label02: break else: unique_cells_collection.append((label02, x, y, w, h)) patch.save(os.path.join(removal_save_path, image_name)) count += 1 print("ORIGIN POINTS COLLECTION LENGTH: %s" % count) print("AFTER DUPLICATE REMOVAL COLLECTION LENGTH: %s" % len(unique_cells_collection)) return unique_cells_collection
def control_center(tifs, output_file_path): """ 多线程切分控制方法 :param tifs: 待切割 tif 文件路径 :param output_path: 输出 patch 文件路径 :return: 无 """ t0 = datetime.datetime.now() for tif in tifs: # 读取图像 try: try: slide = openslide.OpenSlide(tif) except: slide = TSlide(tif) if slide: t1 = datetime.datetime.now() img_name = os.path.basename(tif).split(".")[0] print("Process %s ..." % img_name) # 采用多进程,线程数默认为CPU核心数 pool = Pool() # 统计切图数量 in_queue = Manager().Queue() width, height = slide.dimensions # 按列读取,仅读取图像中间(指定比例)位置 x, y, width, height = int(width * AVAILABLE_PATCH_START_RATIO), \ int(height * AVAILABLE_PATCH_START_RATIO), \ int(width * AVAILABLE_PATCH_END_RATIO), \ int(height * AVAILABLE_PATCH_END_RATIO) patch_num = get_patch_num(width - x, height - y) output_path = os.path.join(output_file_path, img_name) os.makedirs(output_path, exist_ok=True) # 切图处理 while x < width: pool.apply_async( patch_worker, (tif, x, y, height, output_path, in_queue)) x += DELTA while in_queue.qsize() + 10 < patch_num: sleep(3) print("%s / %s" % (in_queue.qsize(), patch_num)) pool.close() pool.join() print("Calculate Patch Num %s, InCome Patch Num %s" % (patch_num, in_queue.qsize())) t2 = datetime.datetime.now() print("File - %s, Size: (%s, %s), Total cost time: %s" % (img_name, width, height, t2 - t1)) except Exception as e: print(str(e)) # 关闭句柄 slide.close() t3 = datetime.datetime.now() print("TIF FILES NUM %s, TOTAL TIME COST %s" % (len(tifs), (t3 - t0)))