def do_similar_remove(path): files = FilesScanner(path).get_files() pattern = re.compile(r'(.*?)_x(\d+)_y(\d+)_w(\d+)_h(\d+)_s(\d+).jpg') dict_ = {} total = len(files) for index, file in enumerate(files): print("%s / %s ..." % (index + 1, total)) basename = os.path.basename(file) cell_type = os.path.basename(os.path.dirname(file)) items = re.findall(pattern, basename)[0] big_name, x, y, w, h, _ = items x, y, w, h = int(x), int(y), int(w), int(h) if big_name in dict_: lst = dict_[big_name] for item in lst: x_, y_, w_, h = item[:-1] if cal_IOU((x, y, w, h), (x_, y_, w_, h)) > 0.6: save_path = os.path.join(REPEAT_FILE_SAVE_PATH, cell_type) if not os.path.exists(save_path): os.makedirs(save_path) shutil.move(file, save_path) # shutil.copy(item[-1], save_path) # shutil.copy(file, save_path) else: dict_[big_name].append((x, y, w, h, file)) else: dict_[big_name] = [(x, y, w, h, file)] return dict_
def rm_duplicates(boxes): boxes_new = [] unique_point_collection = [] for box in boxes: label, accuracy, (x_center, y_center, w, h) = box x = int(x_center - w / 2) y = int(y_center - h / 2) for index, item in enumerate(unique_point_collection): ratio = cal_IOU(item[2], (x, y, w, h)) if ratio > cfg.darknet.min_overlap_ratio: if item[1] > accuracy: pass else: unique_point_collection[index] = (label, accuracy, (x_center, y_center, w, h)) boxes_new[index] = box break else: unique_point_collection.append((label, accuracy, (x, y, w, h))) boxes_new.append(box) return boxes_new
path = item['path'] cell_type = item['type'] cell_save_path = os.path.join(merge_dir_path, parent_type, key, cell_type) if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) shutil.copy(path, cell_save_path) # 检测算法识别细胞的坐标位置,进行重复性判断 manual_point_coordinate_lst = get_coordinate(manual_point_lst) for point in auto_point_lst: basename = os.path.basename(point['path']) _, x, y, w, h, _ = get_location_from_filename(basename) # 与审核图像存在重复 for item in manual_point_coordinate_lst: if cal_IOU((x, y, w, h), item) > 0.8: break else: path = point['path'] cell_type = point['type'] cell_save_path = os.path.join(merge_dir_path, parent_type, key, cell_type + '_NEW') if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) # 该图像不存在对应审核图像,直接拷贝图像至目标文件夹 shutil.copy(path, cell_save_path)
def remove_repeat_cells(key, csv_file_path): if key not in tiff_dict: raise Exception("XCEPTION PREPROCESS %s NOT FOUND" % key) tiff_path = tiff_dict[key] try: try: slide = openslide.OpenSlide(tiff_path) except: slide = TSlide(tiff_path) except: raise Exception('TIFF FILE OPEN FAILED => %s' % tiff_path) save_path = os.path.join(TEST_IMAGE_SAVE_PATH, key) with open(csv_file_path) as f: lines = csv.reader(f) dict_ = {} unique_cells_collection = [] next(lines, None) count = 0 for line in lines: name, label01, accu01, label02, accu02, xmin, ymin, xmax, ymax = line xmin, ymin, xmax, ymax = float(xmin), float(ymin), float( xmax), float(ymax) x, y, w, h = xmin, ymin, int(xmax - xmin + 0.5), int(ymax - ymin + 0.5) tiff_name, start_x, start_y = re.findall(pattern, name)[0] start_x, start_y = int(start_x), int(start_y) x, y = int(start_x + x), int(start_y + y) origin_save_path = os.path.join(save_path, "origin", label02) removal_save_path = os.path.join(save_path, "removal", label02) if not os.path.exists(origin_save_path): os.makedirs(origin_save_path) if not os.path.exists(removal_save_path): os.makedirs(removal_save_path) patch = slide.read_region((x, y), 0, (w, h)).convert("RGB") image_name = "%s_x%s_y%s_w%s_h%s.jpg" % (key, x, y, w, h) patch.save(os.path.join(origin_save_path, image_name)) for item in unique_cells_collection: label, x_, y_, w_, h_ = item ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_)) if ratio > 0.7 and label == label02: break else: unique_cells_collection.append((label02, x, y, w, h)) patch.save(os.path.join(removal_save_path, image_name)) count += 1 print("ORIGIN POINTS COLLECTION LENGTH: %s" % count) print("AFTER DUPLICATE REMOVAL COLLECTION LENGTH: %s" % len(unique_cells_collection)) return unique_cells_collection
def cell_classification(xml_path_lst, csv_path_lst): """ 基于 :param xml_path_lst: xml 文件路径列表 :param csv_path_lst: csv 文件路径列表 :return: """ print("GET XML AND CSV <NAME: PATH> DICT...") xml_dict = generate_xml_path_dict(xml_path_lst) csv_dict = generate_csv_path_dict(csv_path_lst) # GET NEW MODEL OUTPUT POINTS COLLECTION csv_points_dict = {} removal_xml_save_path = '/home/tsimage/Development/DATA/removal_xmls' keys = list(csv_dict.keys()) total = len(keys) print("GET CSV LABELLED POINTS COLLECTION ...") # read csv for index, key in enumerate(keys): print("GET CSV DATA %s / %s %s..." % (index + 1, total, key)) removal_xml_data_path = os.path.join(removal_xml_save_path, key + ".txt") lst = [] if os.path.exists(removal_xml_data_path): with open(removal_xml_data_path) as f: lines = f.readlines() for line in lines: label, x, y, w, h = line.replace("\n", "").split(',') lst.append((label, int(x), int(y), int(w), int(h))) else: lst = remove_repeat_cells(key, csv_dict[key]) write_to_txt(removal_xml_data_path, lst) csv_points_dict[key] = lst xml_points_dict = {} keys = list(xml_dict.keys()) total = len(keys) print("GET XML LABELLED POINTS COLLECTION ...") # read xml for index, key in enumerate(keys): print("GET XML DATA %s / %s %s..." % (index + 1, total, key)) lst = read_data_from_xml(xml_dict[key]) xml_points_dict[key] = lst print('CELL COMPARE AND CLASSIFICATION ...') # compare and classification keys = list(csv_points_dict.keys()) total = len(keys) # 模型诊断类别与原始手工标注类别不一致的细胞集合 dict_modify = {} # 新识别出的细胞集合 dict_new = {} # 新识别出的细胞集合 dict_same = {} for index, key in enumerate(keys): print("CLASSIFICATION %s / %s %s..." % (index + 1, total, key)) same_lst = [] new_lst = [] modify_lst = [] csv_lst = csv_points_dict[key] xml_lst = xml_points_dict[key] for csv_item in csv_lst: label01, x01, y01, w01, h01 = csv_item for xml_item in xml_lst: label02, x02, y02, w02, h02 = xml_item ratio = cal_IOU((x02, y02, w02, h02), (x01, y01, w01, h01)) if ratio > 0.8: if label01 == label02: same_lst.append(csv_item) else: modify_lst.append( (label02, label01, x01, y01, w01, h01)) break else: new_lst.append(csv_item) dict_same[key] = same_lst dict_new[key] = new_lst dict_modify[key] = modify_lst # 模型漏标的细胞集合 dict_miss = {} keys = list(xml_points_dict.keys()) total = len(keys) for index, key in enumerate(keys): if key not in csv_dict: continue miss_lst = [] csv_lst = csv_points_dict[key] xml_lst = xml_points_dict[key] for xml_item in xml_lst: label01, x01, y01, w01, h01 = xml_item for csv_item in csv_lst: label02, x02, y02, w02, h02 = csv_item if cal_IOU((x02, y02, w02, h02), (x01, y01, w01, h01)) > 0.8: break else: miss_lst.append(xml_item) dict_miss[key] = miss_lst data_after_classification_save_path = '/home/tsimage/Development/DATA/data_after_removal' for index, key in enumerate(keys): save_path = os.path.join(data_after_classification_save_path, key) if not os.path.exists(save_path): os.makedirs(save_path) if key in dict_same: write_to_txt(os.path.join(save_path, 'same.txt'), dict_same[key]) if key in dict_new: write_to_txt(os.path.join(save_path, 'new.txt'), dict_new[key]) if key in dict_modify: write_to_txt(os.path.join(save_path, 'modify.txt'), dict_modify[key]) if key in dict_miss: write_to_txt(os.path.join(save_path, 'miss.txt'), dict_miss[key])
# 当两种标注数据同时存在时,进行去重处理 # 以手工标注信息为准,对自动标注信息在手工标注信息中进行遍历 # 若存在重合度 ratio > 0.5 的标注信息,直接丢弃;否则加入细胞信息列表 if n == 2: # 以手工标注信息为基准 # TO-DO 过滤 重叠率 > 0.5 图像 available_points = deepcopy(manual) # 对自动标注信息进行遍历,存在手工标注信息中存在 重合度 > 0.5 的细胞信息,直接丢弃 for item_ in auto: x_, y_, w_, h_ = item_['x'], item_['y'], item_['w'], item_['h'] for item in manual: x, y, w, h = item['x'], item['y'], item['w'], item['h'] ratio = cal_IOU((x, y, w, h), (x_, y_, w_, h_)) if ratio > ACCEPTED_OVERLAPPED_RATIO: similar_count += 1 break else: available_points.append(item_) points_collection[name] = available_points elif n == 1: if auto: points_collection[name] = auto if manual: points_collection[name] = manual
def generate_labelme_format_xml(csv_files_path, patch_dict, xml_save_path): """ 将 csv 文件内容写入 xml :param csv_files_path: 读取的 csv 存放目录 :param xml_save_path: 输出的 xml 存放路径 :return: """ files = FilesScanner(csv_files_path, postfix=['.csv']).get_files() clas_files = [item for item in files if item.endswith('_clas.csv')] # 待处理 csv 文件总数 total = len(clas_files) for index, file in enumerate(clas_files): print("Processing %s / %s %s" % (index + 1, total, os.path.basename(file))) with open(file) as f: lines = csv.reader(f) dict_ = {} next(lines, None) for line in lines: key = line[0] box = { 'name': line[3], 'xmin': 0 if float(line[5]) < 0 else int(float(line[5]) + 0.5), 'ymin': 0 if float(line[6]) < 0 else int(float(line[6]) + 0.5), 'xmax': 0 if float(line[7]) < 0 else int(float(line[7]) + 0.5), 'ymax': 0 if float(line[8]) < 0 else int(float(line[8]) + 0.5), } if key not in dict_: dict_[key] = [box] else: dict_[key].append(box) for key, lst in dict_.items(): if key in patch_dict: patch = patch_dict[key] label = patch['label'] image_path = patch['path'] save_path = os.path.join(xml_save_path, label) if not os.path.exists(save_path): os.makedirs(save_path) # remove duplicated cells lst_ = [] for item in lst: x, y, w, h = item['xmin'], item['ymin'], item['xmax'] - item['xmin'], item['ymax'] - item['ymin'] for item_ in lst_: x_, y_, w_, h_ = item_['xmin'], item_['ymin'], item_['xmax'] - item_['xmin'], item_['ymax'] - item_['ymin'] if cal_IOU((x, y, w, h), (x_, y_, w_, h_)) > 0.8: break else: lst_.append(item) write_to_labelme_xml(lst_, os.path.join(save_path, key + '.xml')) shutil.copy(image_path, save_path) else: raise Exception("%s NOT FOUND IN DICT" % file)
print("%s / %s %s ... " % (index + 1, total, key)) lst01 = cell_dict[key] if key in train_dict: lst02 = train_dict[key] for ctype01, path01 in lst01: basename = os.path.basename(path01) x01, y01, w01, h01, _ = re.findall(pattern01, basename)[0] for ctype02, path02 in lst02: basename = os.path.basename(path02) x02, y02, w02, h02, _ = re.findall(pattern02, basename)[0] ratio = cal_IOU((int(x01), int(y01), int(w01), int(h01)), (int(x02), int(y02), int(w02), int(h02))) if ratio > 0.5: cell_save_path = os.path.join(merge_dir_path, key, ctype02) if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) shutil.copy(path01, cell_save_path) break else: cell_save_path = os.path.join(merge_dir_path, key, ctype01 + '_NEW') if not os.path.exists(cell_save_path): os.makedirs(cell_save_path) shutil.copy(path01, cell_save_path)