def validate_one_epoch(self, epoch): box_predictor = models.BoxesPredictor(0.9) recall_cum = 0 prec_cum = 0 iou_cum = 0 self.model = self.model.cuda() i = 0 for _, batch in enumerate(self.test_dataloader): images = batch['image'].permute(0, 3, 1, 2).cuda() bbox_maps = batch['bbox_map'].unsqueeze(1).float() quad = batch['quad_formatting'].permute(0, 3, 1, 2) ignore_map = batch['ignore_map'].unsqueeze(1) prediction = self.model(images) for key in prediction: prediction[key] = prediction[key].cpu() boxes = box_predictor(prediction)[0] boxes_p = np.hstack( [boxes.detach().numpy(), np.ones([boxes.shape[0], 1])]) boxes_pr = lanms.merge_quadrangle_n9(boxes_p, 0.1) valid_boxes = boxes_pr[:, :8] ground_boxes = box_predictor({ "score_map": bbox_maps, "geometry": quad.float() })[0].numpy() if ground_boxes.shape[0] != 0: ground_boxes = np.hstack( [ground_boxes, np.ones([ground_boxes.shape[0], 1])]) ground_boxes = lanms.merge_quadrangle_n9(ground_boxes, 0.1) ground_boxes = ground_boxes[:, :8] recall, prec, iou = metrics.matchBoxes(ground_boxes, valid_boxes, thr=0.5) recall_cum += recall prec_cum += prec iou_cum += iou self.print_valid_results(epoch, i, recall_cum / (i + 1), prec_cum / (i + 1), iou_cum / (i + 1)) i += 1 print() self.model = self.model.cpu()
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' score = score[0,:,:] # 获取大于阈值的分数坐标 xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y] valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] # 此处lanms是原作者写的c++的Locality-Aware NMS,效率应该更快 # 有python版本的实现:https://github.com/argman/EAST/blob/master/locality_aware_nms.py boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) return boxes
def detect(score_map, geo_map, score_map_thresh, box_thresh, nms_thres): ''' ''' if len(score_map.shape) == 3: score_map = score_map[:, :, 0] #geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0))) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def nms(self, dets): if self.is_python35: import lanms dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh) else: dets = nms_locality(dets, self.nms_thresh) return dets
def detect(score_map, geo_map, timer, score_map_thresh=0.1, box_thresh=0.1, nms_thres=0.5): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(score_map, geo_map, score_map_thresh=0.1, box_thresh=0.005, nms_thres=0.25): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) if len(xy_text) < 1: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0))) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' score = score[0,:,:] xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y] valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) return boxes
def detect(im_fn, ratio_h, ratio_w, score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore hand boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' # global heatmaps if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] #updated # resize_score_map = cv2.resize(score_map, (score_map.shape[1]*4, score_map.shape[0]*4)) # cv2.imwrite(FLAGS.heatmap_output_dir+os.path.basename(im_fn)[:-4]+'_heatmap.png', resize_score_map*255) # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the hand boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} hand boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def detect(self, score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.2, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # (560, 2) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # (560, 2) # print('{} text boxes after thresh'.format(xy_text.shape[0])) # restore start = time.time() # (1035, 4, 2) # (n,2)*4 (n,5) text_box_restored = restore_rectangle( xy_text[:, ::-1] * 4, # x,y互换 geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) # (N,9) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) #(2, 9) # print('{} text boxes after nms'.format(boxes.shape[0])) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def inference(image_path): image = cv2.imread(image_path) image_fname = osp.split(image_path)[-1] image_fname_noext = osp.splitext(image_fname)[0] label_fname = 'res_' + image_fname_noext + '.txt' src_image = image.copy() image, scale, pad, window = resize_and_pad_image(image, 512) input = mold_image(image) input = np.expand_dims(input, axis=0) score_map, geo_map = model.predict( [input, np.zeros((1, 128, 128, 1)), np.zeros((1, 128, 128, 1))]) # filter the score map score_map = score_map[0, :, :, 0] geo_map = geo_map[0] # argwhere 返回一个二维数组, 每一个元素表示满足条件的值的下标 yx_text = np.argwhere(score_map > 0.8) # sort the text boxes via the y axis yx_text = yx_text[np.argsort(yx_text[:, 0])] # restore # *4 是为了恢复到原来图像的大小, ::-1 用于交换 yx 的位置, 把 x 放在前面, y 放在后面 text_box_restored = restore_rectangle_rbox( yx_text[:, ::-1] * 4, geo_map[yx_text[:, 0], yx_text[:, 1], :]) # cv2.drawContours(image, text_box_restored.round().astype(np.int32), -1, (0, 255, 0)) # show_image(image, 'restored') # cv2.waitKey(0) print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[yx_text[:, 0], yx_text[:, 1]] # nms boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), 0.2) # here we filter some low score boxes by the average score map, this is different from the original paper if boxes.shape[0] != 0: for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) # score_map 上 mask 对应的部分的平均值作为这个 box 的 score boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > 0.1] boxes = np.reshape(boxes[:, :8], (-1, 4, 2)) boxes[:, :, 0] = boxes[:, :, 0] - pad[1][0] boxes[:, :, 1] = boxes[:, :, 1] - pad[0][0] boxes /= scale boxes = boxes.round().astype(np.int32) label_path = osp.join('data/pred', label_fname) with open(label_path, 'w') as f: for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
def test(model, im, filename, input_size = 512, use_cuda = False, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): im = im[:, :, ::-1] im_resized, (ratio_h, ratio_w) = resize_image(im) tmp = im_resized im_resized = im_resized.astype(np.float32) im_resized = im_resized.transpose(2, 0, 1) im_resized = torch.from_numpy(im_resized) im_resized = im_resized.cuda() im_resized = im_resized.unsqueeze(0) im = im_resized if use_cuda: im = im.cuda() model = model.eval() F_score, F_geometry = model(im) F_score = F_score.permute(0, 2, 3, 1) F_geometry = F_geometry.permute(0, 2, 3, 1) F_score = F_score.data.cpu().numpy() F_geometry = F_geometry.data.cpu().numpy() if len(F_score.shape) == 4: F_score = F_score[0, :, :, 0] F_geometry = F_geometry[0, :, :, ] # filter the score map xy_text = np.argwhere(F_score > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, F_geometry[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = F_score[xy_text[:, 0], xy_text[:, 1]] # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return [None]*3 # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(F_score, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(F_score, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] if boxes is not None: mx = -float('inf') index = 0 for i,box in enumerate(boxes): if box[8] > mx: mx = box[8] index = i box = sort_poly(boxes[index,:8].reshape((4,2)).astype(np.int32)) cv2.polylines(tmp, [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) return boxes,ratio_h, ratio_w,tmp[:,:,::-1]
def detect( self, image: np.ndarray, params=TextDetectorParams()) -> Union[None, np.ndarray]: """ Detector method. :param image: :param params: :return: An (N x 4 x 2) array, where N is the amount of boxes found and the (4 x 2) subarray holds the corner points. """ if self.__closed: raise RuntimeError( "TextDetector: Session has already been closed.") image_resized, (ratio_h, ratio_w) = _resize_image(image) score_map, geo_map = self.__session.run( [self.__f_score, self.__f_geometry], feed_dict={self.__input_image: [image_resized]}) if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere( score_map > params.threshold_score_map) # filter the score map xy_text = xy_text[np.argsort( xy_text[:, 0])] # sort the text boxes via the y axis text_box_restored = _restore_rectangle( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype(np.float32), params.threshold_nms) if boxes.shape[0] == 0: return None box_thresh = 0.1 boxes = boxes[boxes[:, 8] > box_thresh] if boxes.shape[0] == 0: return None boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h return boxes
def detect(self, score_map, geo_map, score_map_thresh=1e-5, box_thresh=1e-8, nms_thres=0.1): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 logging.debug('{} text boxes before nms'.format( text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) logging.debug('{} boxes before merging'.format(boxes.shape[0])) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None logging.debug('{} boxes before checking scores'.format(boxes.shape[0])) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect_single_scale(score_map, geo_map, score_map_thresh, nms_thres, box_thresh, timer): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() # xy_text[:, ::-1]*4 满足条件的pixel的坐标 # geo_map[xy_text[:, 0], xy_text[:, 1], :] 得到对应点到bounding box 的距离 text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # Modify Start # 我们以bounding box内的平均值作为nms的标准而不是一个点的值 # new_boxes = np.copy(boxes) # for i, box in enumerate(new_boxes): # mask = np.zeros_like(score_map, dtype=np.uint8) # cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1) # new_boxes[i, 8] = cv2.mean(score_map, mask)[0] # end # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) # boxes = lanms.merge_quadrangle_n9(new_boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2, if_eval=True): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' score = score[0, :, :] # 去掉通道1的维度 print('--------------------------二维的score.shape-------------------:', score.shape) xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] # print('-------------------------xy_text.shape:', xy_text.shape) # 得到score大于置信度阈值的坐标 (n,2),n为n个像素点 if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] # 将置信度阈值大于一定值的坐标, # 按照每个点的行坐标进行排序后的得到的xy_text中的从小到大的行索引, valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]# 将y,x 转换为x,y print('------------------valid_pos.shape()--------------:', valid_pos.shape) # 有效的坐标点 valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n #经过阈值筛选后的有效geo 5*n n为有效像素点的个数。 print('-----------------------------valid_geo.shape---------------------:', valid_geo.shape) polys_restored, index = restore_polys( valid_pos, valid_geo, score.shape) # 得到最终的预测框集合,以及在valid_pos中的id序号 if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) # boxes[:, :8] = polys_restored # 装最终所有预测框4个点的信息 boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] # 对应预测框的置信度值 if if_eval: return boxes boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) # 经过NMS返回最终的预测框 print('-----------------pixel中未经过NMS后的boxes.shape---------------:', boxes.shape) return boxes
def detect(self, score_map, geo_map, score_thresh=0.8, cover_thresh=0.1, nms_thresh=0.2): """ restore text boxes from score map and geo map """ score_map = score_map[0] geo_map = np.swapaxes(geo_map, 1, 0) geo_map = np.swapaxes(geo_map, 1, 2) # filter the score map xy_text = np.argwhere(score_map > score_thresh) if len(xy_text) == 0: return [] # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] #restore quad proposals text_box_restored = self.restore_rectangle_quad( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] try: import lanms boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) except: print( 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' ) boxes = nms_locality(boxes.astype(np.float64), nms_thresh) if boxes.shape[0] == 0: return [] # Here we filter some low score boxes by the average score map, # this is different from the orginal paper. for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape( (-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > cover_thresh] return boxes
def detect(score_maps, geo_maps, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2, ratio_ws=None, ratio_hs=None): ''' restore text boxes from score map and geo map :param score_map: :param geo_map:[W,H,5] :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_maps) != len(geo_maps) and len(geo_maps) != len( ratio_hs) and len(ratio_hs) != len(ratio_ws): print 'the number of different scales is not equal' assert False boxes = [] for scale_idx in range(len(score_maps)): cur_score_map = score_maps[scale_idx] cur_geo_map = geo_maps[scale_idx] cur_ratio_w = ratio_ws[scale_idx] cur_ratio_h = ratio_hs[scale_idx] cur_boxes = detect_single_scale(cur_score_map, cur_geo_map, score_map_thresh, nms_thres, box_thresh, timer) cur_boxes_points = cur_boxes[:, :8].reshape((-1, 4, 2)) cur_boxes_points[:, :, 0] /= cur_ratio_w cur_boxes_points[:, :, 1] /= cur_ratio_h cur_boxes = np.concatenate([ np.reshape(cur_boxes_points, (-1, 8)), np.expand_dims(cur_boxes[:, 8], axis=1) ], axis=1) boxes.extend(cur_boxes) boxes = np.array(boxes) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) print('{} text boxes after final nms'.format(boxes.shape[0])) boxes = boxes[:, :8].reshape((-1, 4, 2)) return boxes, timer
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer
def merge_boxes(boxes, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param boxes: boxes :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' boxes = np.asarray(boxes, dtype=np.float32) boxes = np.reshape(boxes, (-1, 9)) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): score = score[0, :, :] xy_text = np.argwhere(score > score_thresh) if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) return boxes
def text_detection(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): ''' restore text boxes from score map and geo map :param score_map: Map of scores from EAST model :param geo_map: Map of geometries from EAST model :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: List of rectangles ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the original paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def get_bounding_boxes_from_output(score_map, geo_map): """ Recreate the boxes from score map and geometry map. :param score_map: Score map. :param geo_map: Geometry map. :return: Restored boxes """ index_text = np.argwhere(score_map > 0.9) restored_bounding_boxes = np.zeros((index_text.shape[0], 8)) for i in range(index_text.shape[0]): indices = index_text[i] # [y, x] restored_bounding_boxes[i] = restore_bounding_box([indices[1], indices[0]], geo_map[indices[0], indices[1], :]) boxes = np.zeros((restored_bounding_boxes.shape[0], 9)) boxes[:, :8] = restored_bounding_boxes boxes[:, 8] = score_map[index_text[:, 0], index_text[:, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), 0.2) return boxes
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' time_6 = time.time() score = score[0, :, :] xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] if xy_text.size == 0: return None, 0 xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y] valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None, 0 boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh) for i, box in enumerate(boxes): mask = np.zeros_like(score, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) # print('111111111111-------',np.sum(mask)) boxes[i, 8] = cv2.mean(score, mask)[0] boxes = boxes[boxes[:, 8] > 0.1] time_4 = time.time() time_nms = time_4 - time_6 # print('nms---------',time_nms) return boxes, time_nms
def restore_bboxes(cls, rho, theta, cls_thresh=None, nms=True): # AX+BY+C=0 <---> ρ=x*cosθ+y*sinθ height = cls.shape[1] if cls_thresh is None: cls_thresh = cfg.test.cls_thresh region = (cls[0, :, :] > cls_thresh) ret, markers = cv2.connectedComponents(np.uint8(region)) for i in range(1, ret): y, x = np.where(markers == i) if len(x) > (cfg.test.rm_small_cc_area * cfg.test.short_side)**2: continue for j in range(len(x)): region[y[j], x[j]] = False r, c = np.where(region) A, B, C = np.cos(theta), np.sin( theta), -rho # the 'C' is actually "C + A*dx + B*dy" A0, B0, C0 = A[0, r, c], B[0, r, c], C[0, r, c] C0 -= A0 * c + B0 * (height - r) # recover C in global coordinate A1, B1, C1 = A[1, r, c], B[1, r, c], C[1, r, c] C1 -= A1 * c + B1 * (height - r) # recover C in global coordinate A2, B2, C2 = A[2, r, c], B[2, r, c], C[2, r, c] C2 -= A2 * c + B2 * (height - r) # recover C in global coordinate A3, B3, C3 = A[3, r, c], B[3, r, c], C[3, r, c] C3 -= A3 * c + B3 * (height - r) # recover C in global coordinate # lines -> bboxes x1, y1 = line_cross_point(A0, B0, C0, A1, B1, C1) x2, y2 = line_cross_point(A1, B1, C1, A2, B2, C2) x3, y3 = line_cross_point(A2, B2, C2, A3, B3, C3) x0, y0 = line_cross_point(A3, B3, C3, A0, B0, C0) bboxes = np.vstack((x0, height - y0, x1, height - y1, x2, height - y2, x3, height - y3, cls[0, r, c])).T if nms: bboxes = lanms.merge_quadrangle_n9(bboxes.astype('float32'), cfg.test.nms_thresh) return bboxes
def detect_txt(score_path, txt_path, prob_thresh, short_side): npy_list = [] for score in os.listdir(score_path): npy_list.append(score.split('_', 1)[1].split('.')[0]) npy_list = list(set(npy_list)) for npy_file in sorted(npy_list): print(npy_file) save_file = os.path.join(txt_path, 'res_' + npy_file + '.txt') vertices = [] for short_side in short_sides: res = np.load(os.path.join( score_path, '{}_'.format(short_side) + npy_file + '.npy'), allow_pickle=True).item() bboxes = restore_bboxes(res['cls'], res['rho'], res['theta'], prob_thresh, nms=False) for bbox in bboxes: pts = bbox[:8] * 4 * (res['origin_w'] / res['resize_w'] + res['origin_h'] / res['resize_h']) / 2 vertices.append(pts.tolist() + [bbox[8]]) bboxes = np.array(vertices) bboxes = lanms.merge_quadrangle_n9(bboxes.astype('float32'), cfg.test.nms_thresh) with open(save_file, 'w') as f: for vertice in bboxes: pts = np.int32(np.around(vertice[:8].flatten())) if not validate_clockwise_points(pts): continue pts = pts.astype(str).tolist() pts = ','.join(pts) + ',' + '0.9' + '\n' # str(conf[j]) f.write(pts) sys.stdout.flush()
def detect(score_map, geo_map, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.1): ''' score_map: 128*128 geo_map: 128*128*5 ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore text_box_restored = Toolbox.restore_rectangle_rbox( xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # nms part boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return np.array([]) # here we filter some low score boxes by the average score map, this is different from the orginal paper # for i, box in enumerate(boxes): # mask = np.zeros_like(score_map, dtype = np.uint8) # cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) # boxes[i, 8] = cv2.mean(score_map, mask)[0] # boxes = boxes[boxes[:, 8] > box_thresh] return boxes
def detect(F_score, F_geo, F_score_thresh=0.8, F_geo_thresh=0.1, nms_thres=0.2): if len(F_score.shape) == 4: F_score = F_score[0, :, :, 0] F_geo = F_geo[0, :, :, ] xy_text = np.argwhere(F_score > F_score_thresh) # filter the score map xy_text = xy_text[np.argsort( xy_text[:, 0])] # sort the text boxes via the y axis # restoration text_box_restored = restore_rectangle_from_rbox( xy_text[:, ::-1] * 4, F_geo[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = F_score[xy_text[:, 0], xy_text[:, 1]] # locality-awareness nms boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None # here we filter some low score boxes by the average score map, this is different from the original paper for i, box in enumerate(boxes): mask = np.zeros_like(F_score, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(F_score, mask)[0] boxes = boxes[boxes[:, 8] > F_geo_thresh] return boxes
def detect_mask(score_map, score_map_full, geo_map, timer, score_map_thresh=FLAGS.score_map_thresh, mask_thresh=FLAGS.mask_thresh, box_thresh=0.1, nms_thres=0.2, min_area=FLAGS.min_area): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param timer: :param mask_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] score_map_full = score_map_full[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :] angle = geo_map_idx_[:, 4] xy_text_0 = xy_text[angle >= 0] xy_text_1 = xy_text[angle < 0] xy_text = np.concatenate([xy_text_0, xy_text_1]) points = list(xy_text) for i in range(len(points)): points[i] = tuple(points[i]) #points = list(zip(*np.where(score_map > mask_thresh))) points_dict = {} for i in range(len(points)): points_dict[points[i]] = i group_mask = dict.fromkeys(points, -1) print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] boxes_all = boxes + 0 timer['restore'] = time.time() - start # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer, [], [] # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] #dict_box = {} #if len(score_map.shape) == 4: #score_map = score_map[0, :, :, 0] #geo_map = geo_map[0, :, :, ] #xy_text = np.argwhere(score_map > mask_thresh) #xy_text = xy_text[np.argsort(xy_text[:, 0])] mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]], dtype=np.uint8) mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3], dtype=np.uint8) if boxes == [] or boxes is None: #if boxes.shape[0] < 1: return mask_colors, timer, [], [] boxes_in = boxes + 0 boxes_in[:, :8] = (boxes_in[:, :8] / 4).astype(np.int32) h, w = score_map.shape boxes_points = [] cnt = 0 for box in boxes_in: b_ps = [] b = box[:8].reshape((4, 2)) if np.linalg.norm(b[0] - b[1]) <= 1 or np.linalg.norm(b[3] - b[0]) <= 1: continue xmin = int(max(np.min(b[:, 0]), 0)) xmax = int(min(np.max(b[:, 0]), w - 1)) ymin = int(max(np.min(b[:, 1]), 0)) ymax = int(min(np.max(b[:, 1]), h - 1)) #print(ymin,ymax,xmin,xmax) local_ = score_map_full[ymin:ymax + 1, xmin:xmax + 1] #print("score_map_full",score_map_full.max(),local_.max) local_mask = np.zeros_like(local_) b[:, 0] -= xmin b[:, 1] -= ymin cv2.fillPoly(local_mask, b.astype(np.int32)[np.newaxis, :, :], 1) local_ = local_ * local_mask #local_th = local_ + 0 #print("mask_thresh",mask_thresh) #local_th[local_th<=mask_thresh] = 1 #cv2.imwrite("local_"+str(cnt)+".jpg",local_*255) #cv2.imwrite("local_"+str(cnt)+"_th.jpg",local_th*255) #cnt += 1 ps_idx = np.argwhere(local_ > mask_thresh) #ps_idx = np.where((xy_text[:,1]>=xmin) & (xy_text[:,1]<=xmax) & (xy_text[:,0]>=ymin) & (xy_text[:,0]<=ymax))[0] #for idx in ps_idx: #b_ps.append([xy_text[idx,1], xy_text[idx,0]]) for idx in ps_idx: b_ps.append([idx[1] + xmin, idx[0] + ymin]) if b_ps == []: continue boxes_points.append(b_ps) #print("boxes_points",boxes_points) mask_contours = [] for b in boxes_points: mask_bin *= 0 b = np.array(b) b = b[:, ::-1] b = b.transpose(1, 0) b = (b[0], b[1]) mask_bin[:, :][b] = 255 mask_colors[:, :, :][b] = 255 area_ = np.sum(mask_bin / 255) if area_ < min_area or area_ >= h * w * 0.99: continue dilate_kernel_size = 3 if FLAGS.mask_dilate: points_in_ = np.argwhere(mask_bin == 255) p_in = points_in_[int(len(points_in_) / 2)] #print("p_in",p_in) if tuple(p_in) in points_dict: box_ = boxes_all[points_dict[tuple(p_in)]] poly_h = min(np.linalg.norm(box_[0] - box_[3]), np.linalg.norm(box_[1] - box_[2])) poly_w = min(np.linalg.norm(box_[0] - box_[1]), np.linalg.norm(box_[2] - box_[3])) dilate_kernel_size = int( min(poly_h, poly_w) * FLAGS.dilate_ratio) poly_rect = cv2.minAreaRect(points_in_.astype(np.float32)) rect_height = min(poly_rect[1][0], poly_rect[1][1]) * FLAGS.dilate_ratio dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)), 3) #dilate_kernel_size = 3 #print("dilate_kernel_size",dilate_kernel_size) kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size)) mask_bin = cv2.dilate(mask_bin, kernel) contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) max_contour = contours[0] max_area = cv2.contourArea(max_contour) for i in range(1, len(contours)): if cv2.contourArea(contours[i]) > max_area: max_contour = contours[i] max_area = cv2.contourArea(max_contour) epsilon = 0.01 * cv2.arcLength(max_contour, True) approx = cv2.approxPolyDP(max_contour, epsilon, True) mask_contours.append(approx) cv2.drawContours(mask_colors, max_contour, -1, (0, 255, 0), 3) cv2.drawContours(mask_colors, approx, -1, (0, 0, 255), 3) #cv2.imshow("mask_colors", mask_colors) #cv2.waitKey(0) return mask_colors, timer, mask_contours, boxes
from lanms import merge_quadrangle_n9 import numpy as np if __name__ == '__main__': # unit square with confidence 1 q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32') print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
for i in range(num_detections[0]): if scores[i] < score_thresh: continue p = rbox_2_polygon(xc[i], yc[i], w[i], h[i], boxes[i,4]) if p is not None: for i in range(8): ratio = ratio_x if i%2 ==0 else ratio_y p[i] *= ratio polys.append(p + [scores[i]]) print 'convert time:', time.time() - t0 ### polys = np.array(polys, dtype=np.float32) # lanms nms_thresh = 0.2 t0 = time.time() polys = lanms.merge_quadrangle_n9(polys, nms_thresh) nms_keep_thresh = 1.0 if polys.shape[0] > 0: remain_index = np.where(polys[:,8] > nms_keep_thresh)[0] polys = polys[remain_index] print 'nms_time:', time.time() - t0, 'boxes:', polys.shape[0] ### # save results res_fn = 'res_' + os.path.splitext(fn)[0] + '.txt' with open(os.path.join(output_dir, res_fn), 'w') as f_res: for p in polys: f_res.write("{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d}\r\n".format( int(p[0]), int(p[1]), int(p[2]), int(p[3]),
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 39], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) input_labels_split = tf.split(input_labels, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] il = input_labels_split[i] total_loss, model_loss, f_score, f_geometry, _ = tower_loss(iis, isms, igms, itms, il, reuse_variables) #f_score, f_geometry = i_am_testing(iis) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) #print "below..." #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name]) #print "above..." reuse_variables = True #print "below.." train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name] #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name] #print train_var #print "above..." train_var += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion') grads = opt.compute_gradients(total_loss, var_list=train_var) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step) #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)] variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') ##################################################################################################################### # BLOCK MODIFIED BY ME #variables = slim.get_variables_to_restore() #var_list = [] #for v in variables: # if len(v.name.split('/')) == 1: # var_list.append(v) # elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'): # var_list.append(v) # else: # pass #saver = tf.train.Saver(var_list) saver = tf.train.Saver(tf.global_variables()) saver_restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # removing the first conv layer #del saver_restore_vars[1] #saver_to_restore = tf.train.Saver(saver_restore_vars) ##################################################################################################################### summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() #print '>> trainable variables: ',slim.get_trainable_variables() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) #my_char_l = "5" #my_char_U = "" data_size = 0 train_data_indices = [] list_of_img_pos = [] with open('./cropped_annotations_5.txt', 'r') as f: annotation_file = f.readlines() #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f: # annotation_file += f.readlines() idx = 0 for line in annotation_file: if len(line)>1 and line[:13] == './cropped_img':# and str(line[14:27]) in training_list: data_size +=1 train_data_indices.append(idx) list_of_img_pos.append(line[14:].split(".")[0]+".tiff") idx += 1 list_of_img_all = os.listdir('./cropped_img') list_of_img_neg = np.array(list(set(list_of_img_all) - set(list_of_img_pos))) #print "Char model: " + my_char_U + my_char_l #print "Data size: " + str(data_size) epoch_size = data_size / (16 * 2) #print epoch_size print "This many steps per epoch: " + str(epoch_size) #list_of_img_neg_char = os.listdir('Data/j') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) print '>> Checkpoint path: ', FLAGS.checkpoint_path print '>> second stuff: ', os.path.basename(ckpt_state.model_checkpoint_path) #all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[1] var1 = saver_restore_vars[1] del saver_restore_vars[1] var2 = saver_restore_vars[422] del saver_restore_vars[422] #names = [var.name for var in saver_restore_vars] saver_to_restore = tf.train.Saver(saver_restore_vars) #print '>> global vars: ', names.index('resnet_v1_50/conv1/weights/ExponentialMovingAverage:0')#[var.name for var in tf.global_variables()] model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # originally saver.restore(sess, model_path) saver_to_restore.restore(sess, model_path) init_new_vars_op = tf.initialize_variables([var1, var2]) sess.run(init_new_vars_op) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) #print "below:" #tvars = tf.trainable_variables() #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name] #print g_vars #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50') #return print FLAGS.learning_rate print reg_constant for step in range(24*epoch_size): ### Generate Dwata ### data = [], [], [], [], [] np.random.shuffle(train_data_indices) num_im = 0 actual_num_im = 0 list_of_chars = list(string.ascii_lowercase)+[str(x) for x in range(10)] while len(data[0]) < 32: prob = 1#np.random.random(1)[0] if prob > 0.49: i = train_data_indices[num_im] im_fn = "./cropped_img/"+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" #print im_fn im = cv2.imread(im_fn) ################################################################################ # adding rest of the channels for ids_c in range(len(list_of_chars)): crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_cropped/'+list_of_chars[ids_c]+'/' filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" pad = cv2.imread(filename) pad = pad[:,:,0] pad = np.expand_dims(pad, axis=2) im = np.append(im, pad, axis = 2) ################################################################################ ################################################################################ if im is not None: r, c, _ = im.shape text_polys = [] text_tags = [] if int(annotation_file[i+1]) > 0: for idx in range(i+2,i+2+int(annotation_file[i+1])): annotation_data = annotation_file[idx] annotation_data = annotation_data.split(" ") x, y = float(annotation_data[0]), float(annotation_data[1]) w, h = float(annotation_data[2]), float(annotation_data[3]) text_polys.append([list([int(x),int(y-h)]),list([int(x+w),int(y-h)]),list([int(x+w),int(y)]),list([int(x),int(y)])]) text_tags.append(False) score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array(text_polys), np.array(text_tags)) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) actual_num_im += 1 num_im += 1 else: im_fn = np.random.choice(list_of_img_neg) ################################################################################ # adding rest of the channels #for i in range(len(list_of_chars)): crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_single_cropped/' filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" pad = cv2.imread(filename) pad = pad[:,:,0] pad = np.expand_dims(pad, axis=2) im = np.append(im, pad, axis = 2) ################################################################################ # im_fn = np.random.choice(list_of_img_neg_char) # im_mini = cv2.imread("Data/j/" + im_fn) # r0, c0, _ = im_mini.shape # im = np.zeros((512, 512, 3), dtype=np.uint8) # ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2 # im[ra:rb, ca:cb, :] = im_mini.copy() if im is not None: r, c, _ = im.shape score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array([]), np.array([])) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) ### Run model ### ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) epoch = step / epoch_size batch_num = step % epoch_size if step % (epoch_size/3) == 0: print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(ml) print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(tl) if step % (epoch_size/2) == 0: #print "Epoche: " + str(step / (epoch_size/2)) saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step) if False: count_right = 0 count_wrong = 0 count_posNotDetected = 0 im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] w, h, _ = im0.shape slide_window = 300 crop_size = 512 crop_center = (256, 256) num_rows, num_cols = int(np.ceil(w/slide_window)), int(np.ceil(h/slide_window)) print num_cols for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]: im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] boxes_one_rot = [] count = 0 while count < num_rows * num_cols: images, data2, data3, data4 = [], [], [], [] for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols temp = im[slide_window*i:slide_window*i+crop_size, \ slide_window*j:slide_window*j+crop_size, ::-1] w2, h2, _ = temp.shape if w2 < crop_size or h2 < crop_size: result = np.zeros((crop_size,crop_size,3)) result[:w2,:h2] = temp temp = result M = cv2.getRotationMatrix2D(crop_center,rot,1.0) temp = cv2.warpAffine(temp, M, (crop_size, crop_size)) images.append(temp) score_map, geo_map, training_mask = icdar.generate_rbox((int(crop_size), int(crop_size)), np.array([]), np.array([])) data2.append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data3.append(geo_map[::4, ::4, :].astype(np.float32)) data4.append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: images, input_score_maps:data2, input_geo_maps: data3, input_training_masks: data4}) for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols boxes = detect(score_map=score[j], geo_map=geometry[j], score_map_thresh=0.01, box_thresh=0.01, nms_thres=0.01) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) for box in boxes: M_inv = cv2.getRotationMatrix2D(crop_center,-1*rot,1) box[0] = M_inv.dot(np.array((box[0,0], box[0,1]) + (1,))) box[1] = M_inv.dot(np.array((box[1,0], box[1,1]) + (1,))) box[2] = M_inv.dot(np.array((box[2,0], box[2,1]) + (1,))) box[3] = M_inv.dot(np.array((box[3,0], box[3,1]) + (1,))) box = sort_poly(box.astype(np.int32)) box[0,0] = box[0,0] + j * slide_window box[0,1] = box[0,1] + i * slide_window box[1,0] = box[1,0] + j * slide_window box[1,1] = box[1,1] + i * slide_window box[2,0] = box[2,0] + j * slide_window box[2,1] = box[2,1] + i * slide_window box[3,0] = box[3,0] + j * slide_window box[3,1] = box[3,1] + i * slide_window boxes_one_rot.append(box) boxes_single_rot = np.zeros((len(boxes_one_rot), 9)) boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape((-1, 8)) boxes_single_rot[:, 8] = 1 labels += boxes_single_rot.tolist() boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres) annotation = np.load("/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" + "D0117-5755036" + ".npy").item() ### Compute the TP, FP, FN info for each image count_right_cache = 0 boxes = boxes[:, :8].reshape((-1, 4, 2)) num_true_pos = len(annotation) for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue k = 0 idx = 0 count_wrong += 1 while (idx < num_true_pos): if k in annotation: proposed_label = annotation[k]['vertices'] if len(proposed_label) == 4: x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \ proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1] if (checkIOU(box, [[x0,y0],[x1,y1],[x2,y2],[x3,y3]]) == True): count_right_cache += 1 count_wrong -= 1 break idx += 1 k += 1 count_posNotDetected += num_true_pos - count_right_cache count_right += count_right_cache precision = (float) (count_right) / (float) (count_right + count_wrong) # TP / TP + FP recall = (float) (count_right) / (float) (count_right + count_posNotDetected) # TP / TP + FN fscore = 2 * (precision * recall) / (precision + recall) print "Precision, recall, fscore: " + str(precision) + ", " + str(recall) + ", " + str(fscore)