def text_detect(img, MAX_HORIZONTAL_GAP=30, MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.7, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.3): boxes, scores = detect.text_detect(np.array(img)) boxes = np.array(boxes, dtype=np.float32) scores = np.array(scores, dtype=np.float32) textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) shape = img.shape[:2] boxes = textdetector.detect(boxes, scores[:, np.newaxis], shape, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH) text_recs = get_boxes(boxes) newBox = [] rx = 1 ry = 1 for box in text_recs: x1, y1 = (box[0], box[1]) x2, y2 = (box[2], box[3]) x3, y3 = (box[6], box[7]) x4, y4 = (box[4], box[5]) newBox.append([ x1 * rx, y1 * ry, x2 * rx, y2 * ry, x3 * rx, y3 * ry, x4 * rx, y4 * ry ]) return newBox
def text_detect(img, MAX_HORIZONTAL_GAP=30, MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.7, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.3, bili=1.2): #下面8行检测单个文字 #下面几行是用yolo给出框. Image.fromarray(img).save("look.png") #看看boxes,scores的含义 是所有rpn的结果. boxes, scores = detect.text_detect(np.array(img)) #这里面用的是yolo boxes = np.array(boxes, dtype=np.float32) scores = np.array(scores, dtype=np.float32) Allboxes = boxes AllScores = scores #函数下面部分是做行拼接. textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) shape = img.shape[:2] #看看下行boxes 的含义. scores:表示最后抽取的汉字对应的score?????????对的,下行的scores就是最后每行的 #分数了!!!!!!!!!!!!!1 非常重要的参数. #下面几行做文字box拼接成seq #tp_groups 表示每一行的文字对应 #boxesForSingle 中的index boxes, scores, keepIndForSingle, tp_groups, boxesForSingle, scoresForSingle = textdetector.detect( boxes, scores[:, np.newaxis], shape, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH, bili) #tp_groups 是boxes对应的 box标号. text_recs = get_boxes(boxes) print(text_recs.shape, "text_recs.shape") newBox = [] rx = 1 ry = 1 for box in text_recs: x1, y1 = (box[0], box[1]) x2, y2 = (box[2], box[3]) x3, y3 = (box[6], box[7]) x4, y4 = (box[4], box[5]) newBox.append([ x1 * rx, y1 * ry, x2 * rx, y2 * ry, x3 * rx, y3 * ry, x4 * rx, y4 * ry ]) return newBox, scores, boxesForSingle, scoresForSingle, keepIndForSingle, tp_groups, Allboxes, AllScores
def detect( self, text_proposals, scores, size, TEXT_PROPOSALS_MIN_SCORE=0.7, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.3, LINE_MIN_SCORE=0.8, ): """ Detecting texts from an image :return: the bounding boxes of the detected texts @@param:TEXT_PROPOSALS_MIN_SCORE:TEXT_PROPOSALS_MIN_SCORE=0.7##过滤字符box阀值 @@param:TEXT_PROPOSALS_NMS_THRESH:TEXT_PROPOSALS_NMS_THRESH=0.3##nms过滤重复字符box @@param:TEXT_LINE_NMS_THRESH:TEXT_LINE_NMS_THRESH=0.3##nms过滤行文本重复过滤阀值 @@param:MIN_RATIO:MIN_RATIO=1.0#0.01 ##widths/heights宽度与高度比例 @@param:LINE_MIN_SCORE:##行文本置信度 @@param:TEXT_PROPOSALS_WIDTH##每个字符的默认最小宽度 @@param:MIN_NUM_PROPOSALS,MIN_NUM_PROPOSALS=1##最小字符数 """ #text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) keep_inds = np.where(scores > TEXT_PROPOSALS_MIN_SCORE)[0] ### text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] sorted_indices = np.argsort(scores.ravel())[::-1] text_proposals, scores = text_proposals[sorted_indices], scores[ sorted_indices] # nms for text proposals if len(text_proposals) > 0: text_proposals, scores = nms(text_proposals, scores, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH) scores = normalize(scores) text_lines, scores = self.text_proposal_connector.get_text_lines( text_proposals, scores, size) ##合并文本行 text_lines = get_boxes(text_lines) text_lines, scores = rotate_nms(text_lines, scores, LINE_MIN_SCORE, TEXT_LINE_NMS_THRESH) return text_lines, scores else: return []
def detect(self, text_proposals, scores, size, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.99, LINE_MIN_SCORE=0.1): """ Detecting texts from an image :return: the bounding boxes of the detected texts @@param:TEXT_PROPOSALS_MIN_SCORE:TEXT_PROPOSALS_MIN_SCORE=0.7##过滤字符box阀值 @@param:TEXT_PROPOSALS_NMS_THRESH:TEXT_PROPOSALS_NMS_THRESH=0.3##nms过滤重复字符box @@param:TEXT_LINE_NMS_THRESH:TEXT_LINE_NMS_THRESH=0.3##nms过滤行文本重复过滤阀值 @@param:LINE_MIN_SCORE:##行文本置信度 """ #text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) keep_inds = np.where(scores > TEXT_PROPOSALS_MIN_SCORE)[0] ### text_proposals, scores = text_proposals[keep_inds], scores[ keep_inds] #1347,4 sorted_indices = np.argsort(scores.ravel())[::-1] # 获取分数顺序排列的索引 text_proposals, scores = text_proposals[sorted_indices], scores[ sorted_indices] #1347,4 # nms for text proposals if len(text_proposals) > 0: text_proposals, scores = nms(text_proposals, scores, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH) scores = normalize(scores) text_lines, scores = self.text_proposal_connector.get_text_lines( text_proposals, scores, size) ##合并文本行 text_lines = get_boxes(text_lines) text_lines, scores = rotate_nms(text_lines, scores, LINE_MIN_SCORE, TEXT_LINE_NMS_THRESH) return text_lines, scores else: return []