def detect(score_map, geo_map, score_map_thresh, box_thresh, nms_thres): ''' ''' if len(score_map.shape) == 3: score_map = score_map[:, :, 0] #geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0))) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(score_map, geo_map, score_map_thresh=0.1, box_thresh=0.005, nms_thres=0.25): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) if len(xy_text) < 1: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0))) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(score_map, geo_map, timer, score_map_thresh=0.1, box_thresh=0.1, nms_thres=0.5): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def cal_IoU_gt_py(pred_geo, pred_cls, gt, threshold=0.8): def compute_IoU(polygon1, polygon2): ''' 计算两个rect的IoU值 :param polygon1: 4, 2 :param polygon2: 4, 2 :return: 0~1 value ''' polygon1 = Polygon(polygon1) if not polygon1.is_valid: polygon1 = polygon1.buffer(0) polygon2 = Polygon(polygon2) if not polygon2.is_valid: polygon2 = polygon2.buffer(0) intersection_polygon = polygon1.intersection(polygon2) if not intersection_polygon.is_valid: return 0.0 intersection_area = intersection_polygon.area uion_area = polygon1.area + polygon2.area - intersection_area return (1.0 * intersection_area) / (1.0 * uion_area) ''' 根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值 :param pred_geo: N, W, H, 5 :param pred_cls: N, W, H, 1 :param gt: N, M, 4, 2 :param threshold: 0.8 :return: ''' # 删除纬度数是1的纬度 pred_cls = np.squeeze(pred_cls) shape = np.shape(pred_geo) IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32) for batch_id in range(shape[0]): score_map = pred_cls[batch_id] geo_map = pred_geo[batch_id] cur_gt = gt[batch_id] if len(np.shape(score_map)) != 2: logging.log(logging.ERROR, 'score map shape isn\'t correct!') assert False xy_text = np.argwhere(score_map > threshold) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # print 'The number of points that satisfy the condition is ', len(xy_text) text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 # print np.shape(text_box_restored) for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)): cur_IoU_value = 0.0 for gt_id in range(len(cur_gt)): if np.sum(cur_gt[gt_id]) == -8: break cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id]))) IoU_gt[batch_id, x, y, 0] = cur_IoU_value return IoU_gt
def detect(self, score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.2, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # (560, 2) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # (560, 2) # print('{} text boxes after thresh'.format(xy_text.shape[0])) # restore start = time.time() # (1035, 4, 2) # (n,2)*4 (n,5) text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, # x,y互换 geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) # (N,9) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) #(2, 9) # print('{} text boxes after nms'.format(boxes.shape[0])) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_threshold=0.1, merge_iou_threshold=0.1, nms_iou_threshold=0.3): """ restore text boxes from score map and geo map :param score_map: ndarray, 形状为: (1, m, n, 1). 指示 (m, n) 的图中, 每个位置上为文本的概率/得分. :param geo_map: ndarray, 形状为: (1, m, n, 5). 指示 (m, n) 的图中, 每个位置上有文本的情况下, 文本框的矩形. 5 个值, 前 4 个分别表示文本框上右下左边到其锚点的距离(该距离为原图像中真实的距离), 最后一个为文本逆时针旋转的角度. :param timer: :param score_map_thresh: 文本的, 得分及概率的阈值. :param box_threshold: 文本框平均得分阈值. :param merge_iou_threshold: Rect 矩形合并时的 IOU 阈值. :param nms_iou_threshold: 非极大值抑制的 IOU 阈值. :return: """ if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] start = time.time() text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) print('{} text boxes before nms'.format(text_box_restored.shape[0])) score = np.expand_dims(score_map[xy_text[:, 0], xy_text[:, 1]], axis=1) boxes = np.concatenate([text_box_restored, score], axis=1) timer['restore'] = time.time() - start start = time.time() boxes = locality_aware_nms.locality_non_max_suppression( boxes=boxes.astype(np.float64), merge_iou_threshold=merge_iou_threshold, nms_iou_threshold=nms_iou_threshold) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # 计算每个 box 包含的点在 score_map 内的平均得分, 小于 box_thresh 的去除. for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_threshold] return boxes, timer
def detect_single_scale(score_map, geo_map, score_map_thresh, nms_thres, box_thresh, timer): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() # xy_text[:, ::-1]*4 满足条件的pixel的坐标 # geo_map[xy_text[:, 0], xy_text[:, 1], :] 得到对应点到bounding box 的距离 text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # Modify Start # 我们以bounding box内的平均值作为nms的标准而不是一个点的值 # new_boxes = np.copy(boxes) # for i, box in enumerate(new_boxes): # mask = np.zeros_like(score_map, dtype=np.uint8) # cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1) # new_boxes[i, 8] = cv2.mean(score_map, mask)[0] # end # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) # boxes = lanms.merge_quadrangle_n9(new_boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def nms_boxBuild(self, score_map, geo_map, timer, ratio, score_map_thresh=0.5, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, :] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # print geo_map[np.where(score_map > score_map_thresh)][:, 4] # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore # start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = np.concatenate([boxes, _boxes], axis=0) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] if len(boxes) > 0: boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, 0, :, :] geo_map = geo_map[0, :, :, :] # filter the score map print(score_map.max()) xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[:, xy_text[:, 0], xy_text[:, 1]]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # nms part #boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None print('{} text boxes after nms'.format(boxes.shape[0])) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(score_map, rbox_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: # score_map:[h,w], geo_map:[h,w,5] score_map = score_map[0, :, :, 0] rbox_map = rbox_map[0, :, :, ] # 获取满足阈值的所有文本区域像素点坐标值,对于检测到的score map根据阈值进行筛选,大于阈值的是文本区域,小于阈值非文本区域 xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] # 根据rbox计算最小外接矩形 # restore_rectangle这个函数作用:检测时输出是rbox结构即一点到矩形框4个边的距离+矩形框的一个角度,转换成文本对应的带角度的矩形框 start = time.time() text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, rbox_map[xy_text[:, 0], xy_text[:, 1], :]) print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # 拿到变化后所以的矩形框经过NMS进行筛选 # 局部感知NMS start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # 计算每个box对应区域所有score map点的均值a,并依据a进行进一步筛选 for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def postprocess_image(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thresh=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter by the score map xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort( xy_text[:, 0])] # sort the text boxes via the y axis # filter by the nms text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thresh) print("num_boxes before nms = {}".format(text_box_restored.shape[0])) print("num_boxes after nms = {}".format(boxes.shape[0])) if boxes.shape[0] == 0: return None # filter low score boxes by the average score map (different from the original paper) for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect_dbscan(score_map, geo_map, timer, score_map_thresh=FLAGS.mask_thresh, box_thresh=0.1, nms_thres=0.2, min_area=FLAGS.min_area, gpu_iou_id=GPU_IOU_ID): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) h, w = np.shape(score_map) #xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis #xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :] angle = geo_map_idx_[:, 4] xy_text_0 = xy_text[angle >= 0] xy_text_1 = xy_text[angle < 0] xy_text = np.concatenate([xy_text_0, xy_text_1]) #print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] if boxes.shape[0] <= 1: return None, timer, [], [] points = list(xy_text) for i in range(len(points)): points[i] = tuple(points[i]) #points = list(zip(*np.where(score_map > mask_thresh))) points_dict = {} for i in range(len(points)): points_dict[points[i]] = i print("gpu_iou_id", gpu_iou_id) if gpu_iou_id < 0: print("using cpu IoU") time_iou = time.time() iou_dict = np.ones((boxes.shape[0], boxes.shape[0]), dtype=np.float32) areas_ = np.zeros((boxes.shape[0]), dtype=np.float32) for i in range(areas_.shape[0]): areas_[i] = cv2.contourArea((boxes[i, 0:8].reshape( (4, 2))).astype(np.float32)) for i in range(iou_dict.shape[0]): for j in range(i + 1, iou_dict.shape[1]): iou_dict[i, j] = calc_iou_area(boxes[i], boxes[j], areas_[i], areas_[j]) iou_dict = 1.0 - iou_dict * iou_dict.T print("time_cpu_iou:", time.time() - time_iou) else: print("using gpu IoU") #boxes_iou = boxes[:,:8] #boxes_iou = np.array(boxes_iou, dtype=np.float32) boxes_iou = [] for b in boxes: boxes_iou.append( cv2.convexHull(b[:8].reshape(4, 2), clockwise=False, returnPoints=True).reshape(8)) boxes_iou = np.array(boxes_iou).astype(np.int32).astype(np.float32) print(boxes_iou.shape) time_iou = time.time() iou_dict = 1.0 - gpu_iou_matrix(boxes_iou, boxes_iou, gpu_iou_id) print("time_gpu_iou:", time.time() - time_iou) #print(iou_dict,iou_dict.shape) in_index = np.arange((boxes.shape[0])) in_index = in_index[:, np.newaxis].astype(np.int32) def distance(a, b): #print(a,b) return iou_dict[int(a[0]), int(b[0])] time_DBSCAN = time.time() y_pred = DBSCAN(eps=0.2, min_samples=10, metric=lambda a, b: distance(a, b)).fit_predict(in_index) print("time_DBSCAN:", time.time() - time_DBSCAN) print("y_pred.shape", y_pred.shape, y_pred) print("xy_text.shape", xy_text.shape) #print(xy_text[0,0],xy_text[0,1]) print(np.unique(y_pred)) box_cnt = np.unique(y_pred) boxes_points = [] for b_idx_ in range(box_cnt.max() + 1): p_idxs = np.argwhere(y_pred == b_idx_)[:, 0] #print("p_idxs.shape,p_idxs:",p_idxs.shape,p_idxs) if p_idxs.shape[0] < min_area: continue b_ps = [] for p_idx_ in p_idxs: b_ps.append([xy_text[p_idx_, 1], xy_text[p_idx_, 0]]) boxes_points.append(b_ps) mask_contours = [] mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3], dtype=np.uint8) mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]], dtype=np.uint8) for b in boxes_points: mask_bin *= 0 b = np.array(b) b = b[:, ::-1] b = b.transpose(1, 0) b = (b[0], b[1]) mask_bin[b] = 255 area_ = np.sum(mask_bin / 255) if area_ < min_area or area_ >= h * w * 0.99: continue dilate_kernel_size = 3 if FLAGS.mask_dilate: points_in_ = np.argwhere(mask_bin == 255) p_in = points_in_[int(len(points_in_) / 2)] #print("p_in",p_in) if tuple(p_in) in points_dict: box_ = boxes[points_dict[tuple(p_in)]] poly_h = min(np.linalg.norm(box_[0] - box_[3]), np.linalg.norm(box_[1] - box_[2])) poly_w = min(np.linalg.norm(box_[0] - box_[1]), np.linalg.norm(box_[2] - box_[3])) dilate_kernel_size = int( min(poly_h, poly_w) * FLAGS.dilate_ratio) poly_rect = cv2.minAreaRect(points_in_.astype(np.float32)) rect_height = min(poly_rect[1][0], poly_rect[1][1]) * FLAGS.dilate_ratio dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)), 3) #dilate_kernel_size = 3 #print("dilate_kernel_size",dilate_kernel_size) kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size)) mask_bin = cv2.dilate(mask_bin, kernel) contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) epsilon = 0.01 * cv2.arcLength(contours[0], True) approx = cv2.approxPolyDP(contours[0], epsilon, True) mask_contours.append(approx) if not FLAGS.no_write_images: mask_colors[:, :, :][b] = np.random.randint(100, 255, size=3) timer['restore'] = time.time() - start return mask_colors, timer, mask_contours, []
def detect_pixellink(score_map, geo_map, timer, mask_thresh=FLAGS.mask_thresh, box_thresh=0.1, nms_thres=0.2, min_area=FLAGS.min_area): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > mask_thresh) h, w = np.shape(score_map) #xy_text = np.argwhere(score_map > mask_thresh) # sort the text boxes via the y axis #xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :] angle = geo_map_idx_[:, 4] xy_text_0 = xy_text[angle >= 0] xy_text_1 = xy_text[angle < 0] xy_text = np.concatenate([xy_text_0, xy_text_1]) #print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] points = list(xy_text) for i in range(len(points)): points[i] = tuple(points[i]) #points = list(zip(*np.where(score_map > mask_thresh))) points_dict = {} for i in range(len(points)): points_dict[points[i]] = i group_mask = dict.fromkeys(points, -1) mask_contours = [] mask = RegLink_func(points, points_dict, group_mask, h, w, boxes, rl_iou_th=FLAGS.rl_iou_th) mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3], dtype=np.uint8) mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]], dtype=np.uint8) #for i in np.unique(mask): for i in range(1, mask.max() + 1): mask_bin *= 0 mask_bin[mask == i] = 255 area_ = np.sum(mask_bin / 255) if area_ < min_area or area_ >= h * w * 0.99: continue dilate_kernel_size = 3 if FLAGS.mask_dilate: points_in_ = np.argwhere(mask_bin == 255) p_in = points_in_[int(len(points_in_) / 2)] #print("p_in",p_in) if tuple(p_in) in points_dict: box_ = boxes[points_dict[tuple(p_in)]] poly_h = min(np.linalg.norm(box_[0] - box_[3]), np.linalg.norm(box_[1] - box_[2])) poly_w = min(np.linalg.norm(box_[0] - box_[1]), np.linalg.norm(box_[2] - box_[3])) dilate_kernel_size = int( min(poly_h, poly_w) * FLAGS.dilate_ratio) poly_rect = cv2.minAreaRect(points_in_.astype(np.float32)) rect_height = min(poly_rect[1][0], poly_rect[1][1]) * FLAGS.dilate_ratio dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)), 3) #dilate_kernel_size = 3 #print("dilate_kernel_size",dilate_kernel_size) kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size)) mask_bin = cv2.dilate(mask_bin, kernel) contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) epsilon = 0.01 * cv2.arcLength(contours[0], True) approx = cv2.approxPolyDP(contours[0], epsilon, True) mask_contours.append(approx) if not FLAGS.no_write_images: mask_colors[mask == i, :] = np.random.randint(100, 255, size=3) timer['restore'] = time.time() - start return mask_colors, timer, mask_contours, []
def detect_mask(score_map, score_map_full, geo_map, timer, score_map_thresh=FLAGS.score_map_thresh, mask_thresh=FLAGS.mask_thresh, box_thresh=0.1, nms_thres=0.2, min_area=FLAGS.min_area): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param mask_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] score_map_full = score_map_full[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :] angle = geo_map_idx_[:, 4] xy_text_0 = xy_text[angle >= 0] xy_text_1 = xy_text[angle < 0] xy_text = np.concatenate([xy_text_0, xy_text_1]) points = list(xy_text) for i in range(len(points)): points[i] = tuple(points[i]) #points = list(zip(*np.where(score_map > mask_thresh))) points_dict = {} for i in range(len(points)): points_dict[points[i]] = i group_mask = dict.fromkeys(points, -1) print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes_all = boxes + 0 timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer, [], [] # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] #dict_box = {} #if len(score_map.shape) == 4: #score_map = score_map[0, :, :, 0] #geo_map = geo_map[0, :, :, ] #xy_text = np.argwhere(score_map > mask_thresh) #xy_text = xy_text[np.argsort(xy_text[:, 0])] mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]], dtype=np.uint8) mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3], dtype=np.uint8) if boxes == [] or boxes is None: #if boxes.shape[0] < 1: return mask_colors, timer, [], [] boxes_in = boxes + 0 boxes_in[:, :8] = (boxes_in[:, :8] / 4).astype(np.int32) h, w = score_map.shape boxes_points = [] cnt = 0 for box in boxes_in: b_ps = [] b = box[:8].reshape((4, 2)) if np.linalg.norm(b[0] - b[1]) <= 1 or np.linalg.norm(b[3] - b[0]) <= 1: continue xmin = int(max(np.min(b[:, 0]), 0)) xmax = int(min(np.max(b[:, 0]), w - 1)) ymin = int(max(np.min(b[:, 1]), 0)) ymax = int(min(np.max(b[:, 1]), h - 1)) #print(ymin,ymax,xmin,xmax) local_ = score_map_full[ymin:ymax + 1, xmin:xmax + 1] #print("score_map_full",score_map_full.max(),local_.max) local_mask = np.zeros_like(local_) b[:, 0] -= xmin b[:, 1] -= ymin cv2.fillPoly(local_mask, b.astype(np.int32)[np.newaxis, :, :], 1) local_ = local_ * local_mask #local_th = local_ + 0 #print("mask_thresh",mask_thresh) #local_th[local_th<=mask_thresh] = 1 #cv2.imwrite("local_"+str(cnt)+".jpg",local_*255) #cv2.imwrite("local_"+str(cnt)+"_th.jpg",local_th*255) #cnt += 1 ps_idx = np.argwhere(local_ > mask_thresh) #ps_idx = np.where((xy_text[:,1]>=xmin) & (xy_text[:,1]<=xmax) & (xy_text[:,0]>=ymin) & (xy_text[:,0]<=ymax))[0] #for idx in ps_idx: #b_ps.append([xy_text[idx,1], xy_text[idx,0]]) for idx in ps_idx: b_ps.append([idx[1] + xmin, idx[0] + ymin]) if b_ps == []: continue boxes_points.append(b_ps) #print("boxes_points",boxes_points) mask_contours = [] for b in boxes_points: mask_bin *= 0 b = np.array(b) b = b[:, ::-1] b = b.transpose(1, 0) b = (b[0], b[1]) mask_bin[:, :][b] = 255 mask_colors[:, :, :][b] = 255 area_ = np.sum(mask_bin / 255) if area_ < min_area or area_ >= h * w * 0.99: continue dilate_kernel_size = 3 if FLAGS.mask_dilate: points_in_ = np.argwhere(mask_bin == 255) p_in = points_in_[int(len(points_in_) / 2)] #print("p_in",p_in) if tuple(p_in) in points_dict: box_ = boxes_all[points_dict[tuple(p_in)]] poly_h = min(np.linalg.norm(box_[0] - box_[3]), np.linalg.norm(box_[1] - box_[2])) poly_w = min(np.linalg.norm(box_[0] - box_[1]), np.linalg.norm(box_[2] - box_[3])) dilate_kernel_size = int( min(poly_h, poly_w) * FLAGS.dilate_ratio) poly_rect = cv2.minAreaRect(points_in_.astype(np.float32)) rect_height = min(poly_rect[1][0], poly_rect[1][1]) * FLAGS.dilate_ratio dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)), 3) #dilate_kernel_size = 3 #print("dilate_kernel_size",dilate_kernel_size) kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size)) mask_bin = cv2.dilate(mask_bin, kernel) contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) max_contour = contours[0] max_area = cv2.contourArea(max_contour) for i in range(1, len(contours)): if cv2.contourArea(contours[i]) > max_area: max_contour = contours[i] max_area = cv2.contourArea(max_contour) epsilon = 0.01 * cv2.arcLength(max_contour, True) approx = cv2.approxPolyDP(max_contour, epsilon, True) mask_contours.append(approx) cv2.drawContours(mask_colors, max_contour, -1, (0, 255, 0), 3) cv2.drawContours(mask_colors, approx, -1, (0, 0, 255), 3) #cv2.imshow("mask_colors", mask_colors) #cv2.waitKey(0) return mask_colors, timer, mask_contours, boxes
def detect(image, score_map, geo_map, timer, im_fn, score_map_thresh=0.8, box_thresh=0.3, nms_thres=0.1): ''' restore text boxes from score map and geo map :param image: :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map['F_score1'].shape) == 4: score_map1 = score_map['F_score1'][0, :, :, 0] score_map2 = score_map['F_score2'][0, :, :, 0] geo_map1 = geo_map['F_geometry1'][0, :, :, ] geo_map2 = geo_map['F_geometry2'][0, :, :, ] # filter the score map # pyplot.imshow(score_map1) # pyplot.savefig('./out/1_'+os.path.basename(im_fn)) # pyplot.imshow(score_map2) # pyplot.savefig('./out/2_'+os.path.basename(im_fn)) xy_text1 = np.argwhere(score_map1 > score_map_thresh) # sort the text boxes via the y axis xy_text1 = xy_text1[np.argsort(xy_text1[:, 0])] xy_text2 = np.argwhere(score_map2 > score_map_thresh) # sort the text boxes via the y axis xy_text2 = xy_text2[np.argsort(xy_text2[:, 0])] # restore start = time.time() # print("hello2") # print(xy_text1[0,:]) # print(xy_text2[0,:]) # print(geo_map1[xy_text1[0, 0], xy_text1[0, 1], :]) # print(geo_map2[xy_text2[0, 0], xy_text2[0, 1], :]) text_box_restored1 = restore_rectangle(xy_text1[:, ::-1] * 4, geo_map1[xy_text1[:, 0], xy_text1[:, 1], :]) # N*4*2 text_box_restored2 = restore_rectangle(xy_text2[:, ::-1] * 8, geo_map2[xy_text2[:, 0], xy_text2[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored1.shape[0] + text_box_restored2.shape[0])) # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32) boxes1 = np.zeros((text_box_restored1.shape[0], 9), dtype=np.float32) boxes2 = np.zeros((text_box_restored2.shape[0], 9), dtype=np.float32) boxes1[:, :8] = text_box_restored1.reshape((-1, 8)) boxes1[:, 8] = score_map1[xy_text1[:, 0], xy_text1[:, 1]] boxes2[:, :8] = text_box_restored2.reshape((-1, 8)) boxes2[:, 8] = score_map2[xy_text2[:, 0], xy_text2[:, 1]] boxes = np.concatenate((boxes1, boxes2), axis=0) # print(boxes.shape) # print(boxes[0, :8]) timer['restore'] = time.time() - start # # Re-Score # start = time.time() # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh) # timer['rescore'] = time.time() - start # if len(score_map1.shape) == 4: # score_map1 = score_map1[0, :, :, 0] # geo_map1 = geo_map1[0, :, :, ] # # filter the score map # xy_text1 = np.argwhere(score_map1 > score_map_thresh) # # # # sort the text boxes via the y axis # xy_text1 = xy_text1[np.argsort(xy_text1[:, 0])] # # restore # text_box_restored1 = restore_rectangle(xy_text1[:, ::-1]*8, geo_map1[xy_text1[:, 0], xy_text1[:, 1], :]) # N*4*2 # # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32) # boxes1 = np.zeros((text_box_restored1.shape[0], 9), dtype=np.float32) # boxes1[:, :8] = text_box_restored1.reshape((-1, 8)) # boxes1[:, 8] = score_map1[xy_text1[:, 0], xy_text1[:, 1]] # # # Re-Score # # start = time.time() # # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh) # # timer['rescore'] = time.time() - start # nms part start = time.time() #boxes = nms_locality.nms_locality(boxes.astype(np.float32), nms_thres) # boxes = nms_locality.standard_nms(boxes.astype(np.float32), nms_thres) # boxes = nms_locality.two_criterion_nms(boxes.astype(np.float64), nms_thres) #boxes = np.concatenate([boxes,boxes1]) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) print(boxes) boxes_final = joint(boxes[:, :8]) print("Fix Bug ", boxes_final) for i in range(len(boxes_final)): pts = np.array(boxes_final[i].strip().split(',')).astype( np.float32).reshape(-1, 2) pts = np.rint(pts).astype(np.int) print(pts) pts.tolist() pts1 = [] for i in range(len(pts)): pts[i] = tuple(pts[i]) pts1.append(pts[i]) print(pts1) # compute centroid cent = (sum([p[0] for p in pts1]) / len(pts1), sum([p[1] for p in pts1]) / len(pts1)) print(cent) # sort by polar angle pts1.sort(key=lambda p: math.atan2(p[1] - cent[1], p[0] - cent[0])) for i in range(len(pts1)): pts1[i] = list(pts1[i]) print(pts1) pts1 = np.array(pts1) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # if DEBUG: # boxes = boxes[np.argsort(boxes[:, 8])[::-1]] # boxes = boxes[np.argsort(boxes[:, 9])[::-1]] # boxes = boxes[:10] # print('selected scores: ', boxes[:, 8]) # print('selected rescores: ', boxes[:, 9]) # here we filter some low score boxes by the average score map, this is different from the orginal paper # for i, box in enumerate(boxes): # mask = np.zeros_like(score_map1, dtype=np.uint8) # cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) # boxes[i, 8] = cv2.mean(score_map1, mask)[0] # boxes = boxes[boxes[:, 8] > box_thresh] return boxes[:, :9], timer
def IRB(box, score_map, geo_map, score_map_thresh, show_log=True, iter_max=10, iter_stop_iou=0.99, merge_iou=0.2): #########################IRB######################### #->:all box -> nms -> box #1-> in box,4points #2->4box #3-> min area box start_time = time.time() pre_box = box iou = 0 pre_iou = -1 iter_cnt = 0 #point_boxes = [] while iou < iter_stop_iou and pre_iou != iou and iter_cnt < iter_max: pre_iou = iou iter_cnt += 1 #####1-> in box,4points: p0-p3 b = pre_box // 4 min1 = max(min(b[0], b[2], b[4], b[6]), 0) max1 = min(max(b[0], b[2], b[4], b[6]), score_map.shape[1]) min2 = max(min(b[1], b[3], b[5], b[7]), 0) max2 = min(max(b[1], b[3], b[5], b[7]), score_map.shape[0]) #local_score = score_map[int(min1//4) : int(max1//4+1), int(min2//4) : int(max2//4+1)] local_score = score_map[int(min2):int(max2 + 1), int(min1):int(max1 + 1)] local_b = np.array([ b[0] - min1, b[1] - min2, b[2] - min1, b[3] - min2, b[4] - min1, b[5] - min2, b[6] - min1, b[7] - min2 ]) local_score = np.array(local_score) mask = np.zeros_like(local_score, dtype=np.uint8) shrinked = True if shrinked: poly = local_b.reshape((4, 2)).astype(np.int32) r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) shrinked_poly = shrink_poly_hjb_v0(poly.copy(), r, shrink_ratio=0.3) #shrinked_poly = shrink_poly(poly.copy(), r,shrink_ratio=0.3) cv2.fillPoly(mask, shrinked_poly.astype(np.int32)[np.newaxis, :, :], 1) else: cv2.fillPoly(mask, local_b.reshape((-1, 4, 2)).astype(np.int32), 1) local_score_masked = local_score * mask xy_text = np.argwhere(local_score_masked > score_map_thresh) if len(xy_text) == 0: if shrinked == False: return pre_box, time.time() - start_time, iter_cnt else: cv2.fillPoly(mask, local_b.reshape((-1, 4, 2)).astype(np.int32), 1) local_score_masked = local_score * mask xy_text = np.argwhere(local_score_masked > score_map_thresh) if len(xy_text) == 0: return pre_box, time.time() - start_time, iter_cnt p0 = np.argmin(xy_text[:, 0]) p1 = np.argmax(xy_text[:, 0]) p2 = np.argmin(xy_text[:, 1]) p3 = np.argmax(xy_text[:, 1]) #####2->4box: b_s[] mask = np.zeros_like(local_score_masked, dtype=np.uint8) mask[xy_text[p0, :][0], xy_text[p0, :][1]] = 1 mask[xy_text[p1, :][0], xy_text[p1, :][1]] = 1 mask[xy_text[p2, :][0], xy_text[p2, :][1]] = 1 mask[xy_text[p3, :][0], xy_text[p3, :][1]] = 1 xy_text = np.argwhere(mask == 1) xy_text[:, 0] += int(min2) xy_text[:, 1] += int(min1) xy_text = xy_text[np.argsort(xy_text[:, 0])] #print("xy_text:",xy_text,len(xy_text)," points:",p0,p1,p2,p3) text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 b_s = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) b_s[:, :8] = text_box_restored.reshape((-1, 8)) b_s[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] #####3-> min area box score_points = b_s[:, 8].sum() / len(b_s) score = score_points b_iou = [] for pb in b_s: iou = calc_iou(pre_box, pb) if iou > merge_iou: b_iou.append(pb) if len(b_iou) == 0: return pre_box, time.time() - start_time, iter_cnt b_iou.append(pre_box) b_iou = np.array(b_iou) b_iou = b_iou[:, :8].reshape((-1, 2)) rect = cv2.minAreaRect(b_iou) points = cv2.boxPoints(rect) points = points.reshape((-1)) current_box = np.insert(points, len(points), score) #point_boxes.append(current_box) #####stop iou iou = calc_iou(pre_box, current_box) pre_box = current_box if show_log: print("iter_cnt", iter_cnt, iou) #point_boxes = np.array(point_boxes) #point_boxes = lanms.merge_quadrangle_n9(point_boxes.astype('float32'), nms_thres) IRB2box_time = time.time() - start_time if show_log: print("IRB2box time:", IRB2box_time) return current_box, IRB2box_time, iter_cnt
sharedconv = model_sharedconv(im.copy()) # x_batch['images'].shape f_score_, geo_score_ = model_detection(sharedconv) # -------- # score_map_thresh = 0.20 f_score = f_score_[0, :, :, 0].numpy() geo_score = geo_score_[0, :, :, ].numpy() # filter out by score map xy_text = np.argwhere(f_score > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] if len(xy_text) > 0: # restore to coordinates text_box_restored = restore_rectangle(origin=xy_text[:, ::-1] * 4, geometry=geo_score[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 # filter out by average score # box_thresh = 0.95 # ids = [] # for i, box in enumerate(text_box_restored): # mask = np.zeros_like(f_score_[0, :, :, :], dtype=np.uint8) # mask = cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) # id = cv2.mean(f_score_[0, :, :, :].numpy(), mask)[0] # ids.append(id) # text_box_restored = text_box_restored[np.array(ids) > box_thresh] # nms selected_indices = tf.image.non_max_suppression(boxes=text_box_restored[:, ::2, :].reshape((-1, 4)).astype(np.float32), scores=f_score[xy_text[:, 0], xy_text[:, 1]], max_output_size=50,
def detect(image, score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param image: :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # # Re-Score # start = time.time() # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh) # timer['rescore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = nms_locality.standard_nms(boxes.astype(np.float64), nms_thres) # boxes = nms_locality.two_criterion_nms(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # if DEBUG: # boxes = boxes[np.argsort(boxes[:, 8])[::-1]] # boxes = boxes[np.argsort(boxes[:, 9])[::-1]] # boxes = boxes[:10] # print('selected scores: ', boxes[:, 8]) # print('selected rescores: ', boxes[:, 9]) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes[:, :9], timer
def cal_IoU_gt_py_multiprocess(pred_geo, pred_cls, gt, threshold=0.8): def compute_IoU(polygon1, polygon2): ''' 计算两个rect的IoU值 :param polygon1: 4, 2 :param polygon2: 4, 2 :return: 0~1 value ''' polygon1 = Polygon(polygon1) if not polygon1.is_valid: polygon1 = polygon1.buffer(0) polygon2 = Polygon(polygon2) if not polygon2.is_valid: polygon2 = polygon2.buffer(0) intersection_polygon = polygon1.intersection(polygon2) if not intersection_polygon.is_valid: return 0.0 intersection_area = intersection_polygon.area uion_area = polygon1.area + polygon2.area - intersection_area return (1.0 * intersection_area) / (1.0 * uion_area) ''' 根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值 :param pred_geo: N, W, H, 5 :param pred_cls: N, W, H, 1 :param gt: N, M, 4, 2 :param threshold: 0.8 :return: ''' # 删除纬度数是1的纬度 print 'hello0' pred_cls = np.squeeze(pred_cls) shape = np.shape(pred_geo) IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32) for batch_id in range(shape[0]): process_num = 8 pool = Pool(processes=process_num) print 'hello1' score_map = pred_cls[batch_id] geo_map = pred_geo[batch_id] cur_gt = gt[batch_id] print 'hello2' # print 'the shape of score_map is ', np.shape(score_map) # print 'the shape of geo_map is ', np.shape(geo_map) if len(np.shape(score_map)) != 2: logging.log(logging.ERROR, 'score map shape isn\'t correct!') assert False xy_text = np.argwhere(score_map > threshold) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # print 'The number of points that satisfy the condition is ', len(xy_text) text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 # print np.shape(text_box_restored) pre_process_num = len(xy_text) / process_num + 1 xss = {} yss = {} boxss = {} print 'hello3' for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)): process_id = idx / pre_process_num if process_id not in xss.keys(): xss[process_id] = [] yss[process_id] = [] boxss[process_id] = [] xss[process_id].append(x) yss[process_id].append(y) boxss[process_id].append(box) else: xss[process_id].append(x) yss[process_id].append(y) boxss[process_id].append(box) print 'hello4' def process_single_test(): return 1.0 def process_single(boxs, cur_gt): print 'hello4-0' IoU_values = [] print 'hello4-1' return np.random.random(len(boxs)) for box in boxs: cur_IoU_value = 0.0 print 'hello4-2' for gt_id in range(len(cur_gt)): if np.sum(cur_gt[gt_id]) == -8: break cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id]))) IoU_values.append(cur_IoU_value) print 'hello4-3' print 'hello4-3' return IoU_values results = [] print 'hello5' for process_id in range(process_num): print 'hello6' # results.append(pool.apply_async(func=process_single, args=(boxss[process_id], cur_gt, ))) results.append(pool.apply_async(func=process_single_test, args=())) print 'hello7' pool.close() pool.join() print 'hello8' for process_id, res in enumerate(results): xs = xss[process_id] ys = yss[process_id] print 'hello9' xs = np.asarray(xs) ys = np.asarray(ys) print np.shape(xs) print np.shape(ys) IoU_values = res.get() xs = np.asarray(xs) ys = np.asarray(ys) print np.shape(IoU_values) print np.shape(xs) print np.shape(ys) IoU_gt[batch_id, xs, ys, 0] = IoU_values print 'hello10' print 'hello11' return IoU_gt
def detect(score_map, geo_map, timer, score_map_thresh=FLAGS.threshold, box_thresh=FLAGS.box_thresh, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: if FLAGS.IRB == False: return None, timer else: return None, timer, 0, 0 # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] if FLAGS.IRB == False: return boxes, timer #########################IRB######################### #->:all box -> nms -> box #1-> in box,4points #2->4box #3-> min area box point_boxes = [] IRB2box_times = 0 iter_cnts = 0 for b in boxes: poly = b[0:8].reshape((4, 2)).astype(np.int32) poly_h = max(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = max(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) if max(poly_h, poly_w) > FLAGS.start_IRB_max_len: current_box, IRB2box_time, iter_cnt = IRB( b, score_map, geo_map, score_map_thresh, show_log=True, merge_iou=nms_thres) #,iter_max=10,iter_stop_iou=0.9) else: current_box = b IRB2box_time = 0 iter_cnt = 0 point_boxes.append(current_box) IRB2box_times += IRB2box_time iter_cnts += iter_cnt point_boxes = np.array(point_boxes) if point_boxes.shape[0] != 0: point_boxes = point_boxes[point_boxes[:, 8] > box_thresh] point_boxes = point_boxes[py_cpu_nms(point_boxes.astype('float32'), nms_thres)] else: point_boxes = None return point_boxes, timer, IRB2box_times, iter_cnts