def cut_cells(filename, labels_csv, save_path, factor, N): labels = read_labels_csv(labels_csv) marked_boxes = read_labels_xml(os.path.splitext(filename)[0] + ".xml") try: slide = openslide.OpenSlide(filename) except: slide = TSlide(filename) basename = os.path.splitext(os.path.basename(filename))[0] parent_d = os.path.basename(os.path.dirname(filename)) save_path = os.path.join(save_path, parent_d, basename) for box in labels: x, y, w, h, p, label = box marked_class_i = is_overlapped(marked_boxes, box, factor) if marked_class_i: image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( p, marked_class_i, basename, x, y, w, h, N) else: image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( p, basename, x, y, w, h, N) save_path_i = os.path.join(save_path, label) os.makedirs(save_path_i, exist_ok=True) image_fullname = os.path.join(save_path_i, image_name) x_N, y_N = int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2) w_N, h_N = int(N * w), int(N * h) slide.read_region((x_N, y_N), 0, (w_N, h_N)).convert("RGB").save(image_fullname) slide.close()
def cut_cells(self, tifname, new_dict, save_path): """ :param tifname: full path name of .tif/.kfb file :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]} :param save_path: image saving path (note: image is saved under class_i) :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi) """ try: slide = openslide.OpenSlide(tifname) except: slide = TSlide(tifname) basename = os.path.splitext(os.path.basename(tifname))[0] for x_y, boxes in new_dict.items(): for box in boxes: # image naming: tifname_x_y_w_h.jpg x = int(x_y.split('_')[0]) + int(box[4][0]) y = int(x_y.split('_')[1]) + int(box[4][1]) w = int(box[4][2]) h = int(box[4][3]) image_name = "{}_x{}_y{}_w{}_h{}.jpg".format( basename, x, y, w, h) save_path_i = os.path.join(save_path, str(box[2])) os.makedirs(save_path_i, exist_ok=True) image_fullname = os.path.join(save_path_i, image_name) slide.read_region((x, y), 0, (w, h)).convert("RGB").save(image_fullname) slide.close()
def gen_np_array_mem(self, results, classes=cfg.darknet.classes, det=cfg.xception.det1, size=cfg.xception.size): """ :param classes: [class_i,] :param det: the threshold of det to use certain box or not, from darknet prediction :param size: image size to cut, default to 299, which is used in Xception/inception :param results: dict generated after running darknet predict: {x_y: [(class_i, det, (x,y,w,h)),]} :return: numpy array: numpy array of each cell, in order cell_index: {index: [x_y, [class_i, det, (x,y,w,h)]]}, index is index in numpy array, x_y is jpg image name, it represents cell source, box = [class_i, det, (x,y,w,h)] is cell info from darknet """ def resize_img(img, size): # pad zero with short side img_croped = img.crop( (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2), img.size[0] + (size - img.size[0]) / 2, img.size[1] + (size - img.size[1]) / 2)) # now, yolo output is square, only need resize img_resized = img_croped.resize((size, size)) return img_resized try: slide = openslide.OpenSlide(self.input_file) except: slide = TSlide(self.input_file) cell_list = [] cell_index = {} index = 0 for x_y, boxes in results.items(): for box in boxes: if box[0] in classes and box[1] > det: x = int(x_y.split('_')[0]) + int(box[2][0]) y = int(x_y.split('_')[1]) + int(box[2][1]) w = int(box[2][2]) h = int(box[2][3]) cell = slide.read_region((x, y), 0, (w, h)).convert("RGB") cell_list.append(np.array(resize_img(cell, size))) cell_index[index] = [x_y, list(box)] index += 1 slide.close() # return np.asarray(cell_list), cell_index return cell_list, cell_index
def worker(tiff_path, keys, points_dict, save_path, N): basename = os.path.splitext(os.path.basename(tiff_path))[0].replace( " ", "-") try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) cell_count = 0 for x_y in keys: boxes = points_dict[x_y] for box in boxes: x0, y0 = x_y.split('_') x = int(x0) + int(box[4][0]) y = int(y0) + int(box[4][1]) w = int(box[4][2]) h = int(box[4][3]) # make save dir cell_save_dir = os.path.join(save_path, box[2]) os.makedirs(cell_save_dir, exist_ok=True) image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( 1 - box[3], basename, x, y, w, h, N) cell_save_path = os.path.join(cell_save_dir, image_name) slide.read_region( (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0, (int(N * w), int(N * h))).convert("RGB").save(cell_save_path) cell_count += 1 slide.close() return cell_count
def cut_cells_p_marked_(self, tifname, new_dict, save_path, factor=0.3, N=4): def get_labels(xmlname): if not os.path.isfile(xmlname): return [] classes = { "#aa0000": "HSIL", "#aa007f": "ASCH", "#005500": "LSIL", "#00557f": "ASCUS", "#0055ff": "SCC", "#aa55ff": "EC", "#ff5500": "AGC", "#00aa00": "FUNGI", "#00aa7f": "TRI", "#00aaff": "CC", "#55aa00": "ACTINO", "#55aa7f": "VIRUS", "#ffffff": "NORMAL", "#000000": "MC", "#aa00ff": "SC", "#ff0000": "RC", "#aa5500": "GEC" } DOMTree = xml.dom.minidom.parse(xmlname) collection = DOMTree.documentElement annotations = collection.getElementsByTagName("Annotation") marked_boxes = [] for annotation in annotations: colorCode = annotation.getAttribute("Color") if not colorCode in classes: continue marked_box = [classes[colorCode], []] coordinates = annotation.getElementsByTagName("Coordinate") marked_box[1] = [(float(coordinate.getAttribute('X')), float(coordinate.getAttribute('Y'))) for coordinate in coordinates] marked_boxes.append(marked_box) return marked_boxes def is_overlapped(marked_boxes, predicted_box, factor): for marked_box in marked_boxes: marked_box_obj = geometry.Polygon(marked_box[1]) predicted_box_obj = geometry.box( predicted_box[0], predicted_box[1], predicted_box[0] + predicted_box[2], predicted_box[1] + predicted_box[3]) if marked_box_obj.intersection(predicted_box_obj).area / ( marked_box_obj.area + predicted_box_obj.area - marked_box_obj.intersection(predicted_box_obj).area ) >= factor: return marked_box[0] return "" tiff_dict = get_tiff_dict() if tifname not in tiff_dict: raise Exception("XCEPTION POSTPROCESS %s NOT FOUND" % tifname) try: slide = openslide.OpenSlide(tiff_dict[tifname]) except: slide = TSlide(tiff_dict[tifname]) basename = os.path.splitext(os.path.basename(tifname))[0] parent_d = os.path.basename(os.path.dirname(tifname)) save_path = os.path.join(save_path, parent_d, basename) marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml") for x_y, boxes in new_dict.items(): for box in boxes: # image naming: tifname_x_y_w_h_p.jpg _, x, y = re.findall(pattern, x_y)[0] x = int(x) + int(box[4][0]) y = int(y) + int(box[4][1]) w = int(box[4][2]) h = int(box[4][3]) marked_class_i = is_overlapped(marked_boxes, (x, y, w, h), factor) if marked_class_i: image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( 1 - box[3], marked_class_i, basename, x, y, w, h, N) save_path_i = os.path.join(save_path, box[2], "marked") else: image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( 1 - box[3], basename, x, y, w, h, N) save_path_i = os.path.join(save_path, box[2]) os.makedirs(save_path_i, exist_ok=True) image_fullname = os.path.join(save_path_i, image_name) slide.read_region( (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0, (int(N * w), int( N * h))).convert("RGB").save(image_fullname) slide.close()
def cut_cells_p_marked(self, tifname, new_dict, save_path, factor=0.3, N=4): """ :param tifname: full path name of .tif/.kfb file :param new_dict: {x_y: [[class_i, det, class_i, det, (x,y,w,h)],]} :param save_path: image saving path (note: image is saved under class_i) :param factor: overlapping threshold, added marked info to image filename if overlapped :output format: save_path/diagnosis/tifbasename/class_i/tifname_x_y_w_h.jpg (note: x, y here is relative to wsi) (note: x, y here is relative to wsi. p is the value of the second det. image size is twice the annotation box. check if the cell is marked, add marked if so.) """ # https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely def get_labels(xmlname): """collect labeled boxes from asap xml :param xmlname: full path name of .xml file, got from .tif/.kfb file :output format: [[class_i, [(xi,yi),]],] """ if not os.path.isfile(xmlname): return [] classes = { "#aa0000": "HSIL", "#aa007f": "ASCH", "#005500": "LSIL", "#00557f": "ASCUS", "#0055ff": "SCC", "#aa557f": "ADC", "#aa55ff": "EC", "#ff5500": "AGC1", "#ff557f": "AGC2", "#ff55ff": "AGC3", "#00aa00": "FUNGI", "#00aa7f": "TRI", "#00aaff": "CC", "#55aa00": "ACTINO", "#55aa7f": "VIRUS", "#ffffff": "NORMAL", "#000000": "MC", "#aa00ff": "SC", "#ff0000": "RC", "#aa5500": "GEC" } DOMTree = xml.dom.minidom.parse(xmlname) collection = DOMTree.documentElement annotations = collection.getElementsByTagName("Annotation") marked_boxes = [] for annotation in annotations: colorCode = annotation.getAttribute("Color") if not colorCode in classes: continue marked_box = [classes[colorCode], []] coordinates = annotation.getElementsByTagName("Coordinate") marked_box[1] = [(float(coordinate.getAttribute('X')), float(coordinate.getAttribute('Y'))) for coordinate in coordinates] marked_boxes.append(marked_box) return marked_boxes def is_overlapped(marked_boxes, predicted_box, factor): """check if predicted box is marked already :param marked_boxes: [[class_i, [(xi,yi),]],] :param box: (x, y, w, h) :param factor: overlapping threshold, added marked info to image filename if overlapped """ for marked_box in marked_boxes: marked_box_obj = geometry.Polygon(marked_box[1]) predicted_box_obj = geometry.box( predicted_box[0], predicted_box[1], predicted_box[0] + predicted_box[2], predicted_box[1] + predicted_box[3]) if marked_box_obj.intersection(predicted_box_obj).area / ( marked_box_obj.area + predicted_box_obj.area - marked_box_obj.intersection(predicted_box_obj).area ) >= factor: return marked_box[0] return "" try: slide = openslide.OpenSlide(tifname) except: slide = TSlide(tifname) basename = os.path.splitext(os.path.basename(tifname))[0] parent_d = os.path.basename(os.path.dirname(tifname)) save_path = os.path.join(save_path, parent_d, basename) marked_boxes = get_labels(os.path.splitext(tifname)[0] + ".xml") for x_y, boxes in new_dict.items(): for box in boxes: # image naming: tifname_x_y_w_h_p.jpg x = int(x_y.split('_')[0]) + int(box[4][0]) y = int(x_y.split('_')[1]) + int(box[4][1]) w = int(box[4][2]) h = int(box[4][3]) marked_class_i = is_overlapped(marked_boxes, (x, y, w, h), factor) if marked_class_i: image_name = "1-p{:.4f}_markedAs_{}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( 1 - box[3], marked_class_i, basename, x, y, w, h, N) save_path_i = os.path.join(save_path, box[2], "marked") else: image_name = "1-p{:.4f}_{}_x{}_y{}_w{}_h{}_{}x.jpg".format( 1 - box[3], basename, x, y, w, h, N) save_path_i = os.path.join(save_path, box[2]) os.makedirs(save_path_i, exist_ok=True) image_fullname = os.path.join(save_path_i, image_name) slide.read_region( (int(x + (1 - N) * w / 2), int(y + (1 - N) * h / 2)), 0, (int(N * w), int( N * h))).convert("RGB").save(image_fullname) slide.close()
def gen_np_array_mem_(self, results, classes=cfg.darknet.classes, det=cfg.xception.det1, size=cfg.xception.size): def resize_img(img, size): img_croped = img.crop( (-((size - img.size[0]) / 2), -((size - img.size[1]) / 2), img.size[0] + (size - img.size[0]) / 2, img.size[1] + (size - img.size[1]) / 2)) img_resized = img_croped.resize((size, size)) return img_resized tiff_dict = get_tiff_dict() if self.input_file not in tiff_dict: raise Exception("XCEPTION PREPROCESS %s NOT FOUND" % self.input_file) try: try: slide = openslide.OpenSlide(tiff_dict[self.input_file]) except: slide = TSlide(tiff_dict[self.input_file]) except: raise Exception('TIFF FILE OPEN FAILED => %s' % self.input_file) cell_list = [] cell_index = {} index = 0 for x_y, boxes in results.items(): for box in boxes: if box[0] in classes and box[1] > det: # print(x_y) _, x, y = re.findall(self.pattern, x_y)[0] # print("1=> %s, %s" % (x, y)) x = int(x) + int(box[2][0]) y = int(y) + int(box[2][1]) w = int(box[2][2]) h = int(box[2][3]) # print("2=> %s, %s" % (x, y)) # center_x = x + int(w / 2 + 0.5) # center_y = y + int(h / 2 + 0.5) # w_ = max(w, h) # h_ = w_ # x_ = center_x - int(w_ / 2 + 0.5) # y_ = center_y - int(h_ / 2 + 0.5) # x_ = 0 if x_ < 0 else x_ # y_ = 0 if y_ < 0 else y_ cell = slide.read_region((x, y), 0, (w, h)).convert("RGB") # cell = slide.read_region((x_, y_), 0, (w_, h_)).convert("RGB") # image_name = "%s_%s_%s_%s.jpg" % (x, y, w, h) # cell.save(os.path.join('/home/tsimage/Development/DATA/middle_cells', image_name)) cell_list.append(np.array(resize_img(cell, size))) cell_index[index] = [x_y, list(box)] index += 1 slide.close() # return np.asarray(cell_list), cell_index return cell_list, cell_index