def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' score = score[0,:,:] xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y] valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thresh) return boxes
def get_text_boxes(image, W, H): # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet('frozen_east_text_detection.pb') # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) (rects, confidences) = decode_predictions(scores, geometry) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes #boxes = non_max_suppression(np.array(rects), probs=confidences) boxes, conf = nms_locality(np.array(rects, dtype=np.float32), confidences) end = time.time() # show timing information on text prediction print("[INFO] text detection took {:.6f} seconds".format(end - start)) return boxes
def detect(score_map, geo_map, timer, scale=4, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param scale: based on the ratio of original image, for examle: 4 denotes the size of score_map needs to be magnified 4 times to reach the original size. :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * scale, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) #boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return np.array([]), timer # here we filter some low score boxes by the average score map, this is different from the orginal paper # Yuting: add next for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // scale, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.14): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('[boxes] {} text boxes before nms'.format( text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float32), nms_thres) #boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes_temp = boxes[boxes[:, 8] > box_thresh] if len(boxes_temp) != 0: boxes = boxes_temp print('[boxes] {} text boxes after nms'.format(len(boxes))) return boxes, timer
def nms_boxBuild(self, score_map, geo_map, timer, ratio, score_map_thresh=0.5, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, :] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # print geo_map[np.where(score_map > score_map_thresh)][:, 4] # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore # start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = np.concatenate([boxes, _boxes], axis=0) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] if len(boxes) > 0: boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, rbox_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: # score_map:[h,w], geo_map:[h,w,5] score_map = score_map[0, :, :, 0] rbox_map = rbox_map[0, :, :, ] # 获取满足阈值的所有文本区域像素点坐标值,对于检测到的score map根据阈值进行筛选,大于阈值的是文本区域,小于阈值非文本区域 xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] # 根据rbox计算最小外接矩形 # restore_rectangle这个函数作用:检测时输出是rbox结构即一点到矩形框4个边的距离+矩形框的一个角度,转换成文本对应的带角度的矩形框 start = time.time() text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, rbox_map[xy_text[:, 0], xy_text[:, 1], :]) print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # 拿到变化后所以的矩形框经过NMS进行筛选 # 局部感知NMS start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # 计算每个box对应区域所有score map点的均值a,并依据a进行进一步筛选 for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print '{} text boxes before nms'.format(text_box_restored.shape[0]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32)/4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(self, score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # nms part boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def postprocess_image(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thresh=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter by the score map xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort( xy_text[:, 0])] # sort the text boxes via the y axis # filter by the nms text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thresh) print("num_boxes before nms = {}".format(text_box_restored.shape[0])) print("num_boxes after nms = {}".format(boxes.shape[0])) if boxes.shape[0] == 0: return None # filter low score boxes by the average score map (different from the original paper) for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshold for score map :param box_thresh: threshold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_coords = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_coords = xy_coords[np.argsort(xy_coords[:, 0])] # restore start = time.time() # Flip (y, x) to (x, y) and upsize 4 (network downsized by 4). # so xy_text[:, ::-1] * 4 text_box_restored = restore_rectangle_rbox( xy_coords[:, ::-1] * 4, geo_map[xy_coords[:, 0], xy_coords[:, 1], :], score_map.shape) print('{} text boxes before nms'.format(text_box_restored.shape[0])) # mask = np.zeros((score_map.shape[0] * 4, score_map.shape[1] * 4, 1), dtype=np.uint8) # for i, box in enumerate(text_box_restored): # cv2.fillPoly(mask, box.astype(np.int32)[np.newaxis, :, :], i+10) # cv2.imshow('debug', mask) # cv2.waitKey(0) # cv2.destroyAllWindows() boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_coords[:, 0], xy_coords[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the original paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect_contours(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.1): ''' 从网络预测中得到可理解的结果 :param score_map: :param geo_map: :param timer: :param score_map_thresh: :param boc_thresh: :param nms_thres: :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] h, w = score_map.shape[:2] score_img = score_map * 255 kernel = np.uint8(np.ones((1, 3))) score_img = cv2.dilate(score_img, kernel) # 膨胀处理 # # 显示score_map中的内容 # im = cv2.erode(score_img, kernel) # im = Image.fromarray(im) # plt.imshow(im) # plt.show() im = np.array(score_img, np.uint8) # 图像二值化,大于阈值的设为255, 小于的设为0, 返回修改后的图像 ret, im = cv2.threshold(im, score_map_thresh * 255, 255, cv2.THRESH_BINARY) contours0, hierarchy = cv2.findContours(im.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) res_boxes = [] for cnt in contours0: vis = np.zeros((h, w), np.uint8) # 获取近似多边形 contours = cv2.approxPolyDP(cnt, 3, True) # 填充凸多边形 cv2.fillConvexPoly(vis, np.int32(contours), (255, 255, 255)) # 获取vis中像素值=255的点的坐标 xy_text = np.argwhere(vis == 255) # 此处获得的坐标相当于原始图片缩小4倍之后的,所以下面的要*4 xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored, angle_m = restore_rectangle( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # print(text_box_restored.reshape((-1, 8)), angle_m) # 返回的是所有点的坐标集合,如果旋转角度较大,那么从这些点集中找到最小外接矩形 # if angle_m/np.pi > 10 or angle_m/np.pi < -10: # points = text_box_restored.reshape((-1, 2)) # rect = cv2.minAreaRect(points.astype(np.int32)) # rec_box = cv2.boxPoints(rect) # score_sum = np.sum(score_map[xy_text[:, 0], xy_text[:, 1]]) # rec_box = np.append(rec_box.reshape(-1, 8), score_sum) # else: # boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) # boxes[:, :8] = text_box_restored.reshape((-1, 8)) # boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # print(boxes) # # rec_box = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # # rec_box = lanms.rec_standard_nms(boxes.astype('float32'), nms_thres) # res_boxes.append(rec_box) score_sum = np.sum(score_map[xy_text[:, 0], xy_text[:, 1]]) rec_box = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) rec_box[:, :8] = text_box_restored.reshape((-1, 8)) rec_box[:, 8] = score_sum # break res_boxes.append(rec_box) boxes = np.squeeze(np.array(res_boxes), axis=0) # print(boxes) # print(boxes.shape) boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) # print(boxes) # # boxes = lanms.merge_quadrangle_n9(np.array(res_boxes).astype('int'), nms_thres) boxes_list = boxes.tolist() boxes_list = sorted(boxes_list, key=lambda k: [k[1], k[0]]) boxes = np.array(boxes_list) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] # print(boxes[:, 8]) boxes = boxes[boxes[:, 8] > box_thresh] return boxes