Beispiel #1
0
    def validate_one_epoch(self, epoch):

        box_predictor = models.BoxesPredictor(0.9)
        recall_cum = 0
        prec_cum = 0
        iou_cum = 0

        self.model = self.model.cuda()

        i = 0
        for _, batch in enumerate(self.test_dataloader):
            images = batch['image'].permute(0, 3, 1, 2).cuda()
            bbox_maps = batch['bbox_map'].unsqueeze(1).float()
            quad = batch['quad_formatting'].permute(0, 3, 1, 2)
            ignore_map = batch['ignore_map'].unsqueeze(1)

            prediction = self.model(images)
            for key in prediction:
                prediction[key] = prediction[key].cpu()
            boxes = box_predictor(prediction)[0]
            boxes_p = np.hstack(
                [boxes.detach().numpy(),
                 np.ones([boxes.shape[0], 1])])
            boxes_pr = lanms.merge_quadrangle_n9(boxes_p, 0.1)
            valid_boxes = boxes_pr[:, :8]

            ground_boxes = box_predictor({
                "score_map": bbox_maps,
                "geometry": quad.float()
            })[0].numpy()
            if ground_boxes.shape[0] != 0:
                ground_boxes = np.hstack(
                    [ground_boxes,
                     np.ones([ground_boxes.shape[0], 1])])
                ground_boxes = lanms.merge_quadrangle_n9(ground_boxes, 0.1)
                ground_boxes = ground_boxes[:, :8]

                recall, prec, iou = metrics.matchBoxes(ground_boxes,
                                                       valid_boxes,
                                                       thr=0.5)
                recall_cum += recall
                prec_cum += prec
                iou_cum += iou

                self.print_valid_results(epoch, i, recall_cum / (i + 1),
                                         prec_cum / (i + 1), iou_cum / (i + 1))
                i += 1

        print()
        self.model = self.model.cpu()
Beispiel #2
0
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
	'''get boxes from feature map
	Input:
		score       : score map from model <numpy.ndarray, (1,row,col)>
		geo         : geo map from model <numpy.ndarray, (5,row,col)>
		score_thresh: threshold to segment score map
		nms_thresh  : threshold in nms
	Output:
		boxes       : final polys <numpy.ndarray, (n,9)>
	'''
	score = score[0,:,:]
	# 获取大于阈值的分数坐标
	xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
	if xy_text.size == 0:
		return None

	xy_text = xy_text[np.argsort(xy_text[:, 0])]
	valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
	valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
	polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) 
	if polys_restored.size == 0:
		return None

	boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
	boxes[:, :8] = polys_restored
	boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]

	# 此处lanms是原作者写的c++的Locality-Aware NMS,效率应该更快
	# 有python版本的实现:https://github.com/argman/EAST/blob/master/locality_aware_nms.py
	boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
	return boxes
Beispiel #3
0
def detect(score_map, geo_map, score_map_thresh, box_thresh, nms_thres):
    '''

    '''
    if len(score_map.shape) == 3:
        score_map = score_map[:, :, 0]
        #geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0)))
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes	
 def nms(self, dets):
     if self.is_python35:
         import lanms
         dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
     else:
         dets = nms_locality(dets, self.nms_thresh)
     return dets
Beispiel #5
0
def detect(score_map, geo_map, timer, score_map_thresh=0.1, box_thresh=0.1, nms_thres=0.5):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
Beispiel #6
0
def detect(score_map, geo_map, score_map_thresh=0.1, box_thresh=0.005, nms_thres=0.25):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    xy_text = np.argwhere(score_map > score_map_thresh)
    if len(xy_text) < 1:
	return None
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    if boxes.shape[0] == 0:
        return None
    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0)))
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes
Beispiel #7
0
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
	'''get boxes from feature map
	Input:
		score       : score map from model <numpy.ndarray, (1,row,col)>
		geo         : geo map from model <numpy.ndarray, (5,row,col)>
		score_thresh: threshold to segment score map
		nms_thresh  : threshold in nms
	Output:
		boxes       : final polys <numpy.ndarray, (n,9)>
	'''
	score = score[0,:,:]
	xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
	if xy_text.size == 0:
		return None

	xy_text = xy_text[np.argsort(xy_text[:, 0])]
	valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
	valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
	polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) 
	if polys_restored.size == 0:
		return None

	boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
	boxes[:, :8] = polys_restored
	boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
	boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
	return boxes
def detect(im_fn,
           ratio_h,
           ratio_w,
           score_map,
           geo_map,
           timer,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2):
    '''
    restore hand boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    # global heatmaps
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    #updated
    # resize_score_map = cv2.resize(score_map, (score_map.shape[1]*4, score_map.shape[0]*4))
    # cv2.imwrite(FLAGS.heatmap_output_dir+os.path.basename(im_fn)[:-4]+'_heatmap.png', resize_score_map*255)

    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the hand boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} hand boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
    def detect(self,
               score_map,
               geo_map,
               timer,
               score_map_thresh=0.8,
               box_thresh=0.2,
               nms_thres=0.2):
        '''
        restore text boxes from score map and geo map
        :param score_map:
        :param geo_map:
        :param timer:
        :param score_map_thresh: threshhold for score map
        :param box_thresh: threshhold for boxes
        :param nms_thres: threshold for nms
        :return:
        '''
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, ]

        # filter the score map
        xy_text = np.argwhere(score_map > score_map_thresh)  # (560, 2)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]  # (560, 2)
        # print('{} text boxes after thresh'.format(xy_text.shape[0]))
        # restore
        start = time.time()
        # (1035, 4, 2)                       # (n,2)*4   (n,5)
        text_box_restored = restore_rectangle(
            xy_text[:, ::-1] * 4,  # x,y互换
            geo_map[xy_text[:, 0], xy_text[:, 1], :])
        boxes = np.zeros((text_box_restored.shape[0], 9),
                         dtype=np.float32)  # (N,9)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
        timer['restore'] = time.time() - start
        # nms part
        start = time.time()
        # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
        boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'),
                                          nms_thres)  #(2, 9)
        # print('{} text boxes after nms'.format(boxes.shape[0]))
        timer['nms'] = time.time() - start

        if boxes.shape[0] == 0:
            return None, timer

        # here we filter some low score boxes by the average score map, this is different from the orginal paper
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape(
                (-1, 4, 2)).astype(np.int32) // 4, 1)
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        boxes = boxes[boxes[:, 8] > box_thresh]

        return boxes, timer
Beispiel #10
0
def inference(image_path):
    image = cv2.imread(image_path)
    image_fname = osp.split(image_path)[-1]
    image_fname_noext = osp.splitext(image_fname)[0]
    label_fname = 'res_' + image_fname_noext + '.txt'
    src_image = image.copy()
    image, scale, pad, window = resize_and_pad_image(image, 512)
    input = mold_image(image)
    input = np.expand_dims(input, axis=0)
    score_map, geo_map = model.predict(
        [input, np.zeros((1, 128, 128, 1)),
         np.zeros((1, 128, 128, 1))])
    # filter the score map
    score_map = score_map[0, :, :, 0]
    geo_map = geo_map[0]
    # argwhere 返回一个二维数组, 每一个元素表示满足条件的值的下标
    yx_text = np.argwhere(score_map > 0.8)
    # sort the text boxes via the y axis
    yx_text = yx_text[np.argsort(yx_text[:, 0])]
    # restore
    # *4 是为了恢复到原来图像的大小, ::-1 用于交换 yx 的位置, 把 x 放在前面, y 放在后面
    text_box_restored = restore_rectangle_rbox(
        yx_text[:, ::-1] * 4, geo_map[yx_text[:, 0], yx_text[:, 1], :])
    # cv2.drawContours(image, text_box_restored.round().astype(np.int32), -1, (0, 255, 0))
    # show_image(image, 'restored')
    # cv2.waitKey(0)
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[yx_text[:, 0], yx_text[:, 1]]
    # nms
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), 0.2)
    # here we filter some low score boxes by the average score map, this is different from the original paper
    if boxes.shape[0] != 0:
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape(
                (-1, 4, 2)).astype(np.int32) // 4, 1)
            # score_map 上 mask 对应的部分的平均值作为这个 box 的 score
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        boxes = boxes[boxes[:, 8] > 0.1]
        boxes = np.reshape(boxes[:, :8], (-1, 4, 2))
        boxes[:, :, 0] = boxes[:, :, 0] - pad[1][0]
        boxes[:, :, 1] = boxes[:, :, 1] - pad[0][0]
        boxes /= scale
        boxes = boxes.round().astype(np.int32)
        label_path = osp.join('data/pred', label_fname)
        with open(label_path, 'w') as f:
            for box in boxes:
                box = sort_poly(box.astype(np.int32))
                if np.linalg.norm(box[0] -
                                  box[1]) < 5 or np.linalg.norm(box[3] -
                                                                box[0]) < 5:
                    continue
                f.write('{},{},{},{},{},{},{},{}\n'.format(
                    box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0],
                    box[2, 1], box[3, 0], box[3, 1]))
Beispiel #11
0
def test(model, im, filename, input_size = 512, use_cuda = False, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2):
    im = im[:, :, ::-1]
    im_resized, (ratio_h, ratio_w) = resize_image(im)
    tmp = im_resized
    im_resized = im_resized.astype(np.float32)
    im_resized = im_resized.transpose(2, 0, 1)

    im_resized = torch.from_numpy(im_resized)
    im_resized = im_resized.cuda()
    im_resized = im_resized.unsqueeze(0)
    im = im_resized
    if use_cuda:
        im = im.cuda()
    model = model.eval()
    F_score, F_geometry = model(im)
    F_score = F_score.permute(0, 2, 3, 1)
    F_geometry = F_geometry.permute(0, 2, 3, 1)
    F_score = F_score.data.cpu().numpy()
    F_geometry = F_geometry.data.cpu().numpy()
    if len(F_score.shape) == 4:
        F_score = F_score[0, :, :, 0]
        F_geometry = F_geometry[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(F_score > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, F_geometry[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = F_score[xy_text[:, 0], xy_text[:, 1]]
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return [None]*3

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(F_score, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        boxes[i, 8] = cv2.mean(F_score, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    if boxes is not None:
        mx = -float('inf')
        index = 0
        for i,box in enumerate(boxes):
            if box[8] > mx:
                mx = box[8]
                index = i
        box = sort_poly(boxes[index,:8].reshape((4,2)).astype(np.int32))
        cv2.polylines(tmp, [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0),
                          thickness=1)
    
    return boxes,ratio_h, ratio_w,tmp[:,:,::-1]
Beispiel #12
0
    def detect(
        self, image: np.ndarray,
        params=TextDetectorParams()) -> Union[None, np.ndarray]:
        """
        Detector method.
        :param image:
        :param params:
        :return: An (N x 4 x 2) array, where N is the amount of boxes found and the (4 x 2) subarray holds the corner
                 points.
        """
        if self.__closed:
            raise RuntimeError(
                "TextDetector: Session has already been closed.")
        image_resized, (ratio_h, ratio_w) = _resize_image(image)

        score_map, geo_map = self.__session.run(
            [self.__f_score, self.__f_geometry],
            feed_dict={self.__input_image: [image_resized]})
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, ]

        xy_text = np.argwhere(
            score_map > params.threshold_score_map)  # filter the score map
        xy_text = xy_text[np.argsort(
            xy_text[:, 0])]  # sort the text boxes via the y axis
        text_box_restored = _restore_rectangle(
            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])

        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

        boxes = lanms.merge_quadrangle_n9(boxes.astype(np.float32),
                                          params.threshold_nms)

        if boxes.shape[0] == 0:
            return None
        box_thresh = 0.1
        boxes = boxes[boxes[:, 8] > box_thresh]

        if boxes.shape[0] == 0:
            return None

        boxes = boxes[:, :8].reshape((-1, 4, 2))
        boxes[:, :, 0] /= ratio_w
        boxes[:, :, 1] /= ratio_h

        return boxes
Beispiel #13
0
 def detect(self,
            score_map,
            geo_map,
            score_map_thresh=1e-5,
            box_thresh=1e-8,
            nms_thres=0.1):
     '''
     restore text boxes from score map and geo map
     :param score_map:
     :param geo_map:
     :param score_map_thresh: threshhold for score map
     :param box_thresh: threshhold for boxes
     :param nms_thres: threshold for nms
     :return:
     '''
     if len(score_map.shape) == 4:
         score_map = score_map[0, :, :, 0]
         geo_map = geo_map[0, :, :, ]
     # filter the score map
     xy_text = np.argwhere(score_map > score_map_thresh)
     # sort the text boxes via the y axis
     xy_text = xy_text[np.argsort(xy_text[:, 0])]
     # restore
     start = time.time()
     text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                           geo_map[xy_text[:, 0],
                                                   xy_text[:,
                                                           1], :])  # N*4*2
     logging.debug('{} text boxes before nms'.format(
         text_box_restored.shape[0]))
     boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
     boxes[:, :8] = text_box_restored.reshape((-1, 8))
     boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
     # nms part
     start = time.time()
     # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
     logging.debug('{} boxes before merging'.format(boxes.shape[0]))
     boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
     if boxes.shape[0] == 0:
         return None
     logging.debug('{} boxes before checking scores'.format(boxes.shape[0]))
     # here we filter some low score boxes by the average score map, this is different from the orginal paper
     for i, box in enumerate(boxes):
         mask = np.zeros_like(score_map, dtype=np.uint8)
         cv2.fillPoly(mask, box[:8].reshape(
             (-1, 4, 2)).astype(np.int32) // 4, 1)
         boxes[i, 8] = cv2.mean(score_map, mask)[0]
     boxes = boxes[boxes[:, 8] > box_thresh]
     return boxes
def detect_single_scale(score_map, geo_map, score_map_thresh, nms_thres,
                        box_thresh, timer):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    # xy_text[:, ::-1]*4 满足条件的pixel的坐标
    # geo_map[xy_text[:, 0], xy_text[:, 1], :] 得到对应点到bounding box 的距离
    text_box_restored = restore_rectangle(xy_text[:, ::-1],
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start

    # Modify Start
    # 我们以bounding box内的平均值作为nms的标准而不是一个点的值
    # new_boxes = np.copy(boxes)
    # for i, box in enumerate(new_boxes):
    #     mask = np.zeros_like(score_map, dtype=np.uint8)
    #     cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1)
    #     new_boxes[i, 8] = cv2.mean(score_map, mask)[0]
    # end

    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    # boxes = lanms.merge_quadrangle_n9(new_boxes.astype('float32'), nms_thres)

    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes
Beispiel #15
0
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2, if_eval=True):
    '''get boxes from feature map
    Input:
        score       : score map from model <numpy.ndarray, (1,row,col)>
        geo         : geo map from model <numpy.ndarray, (5,row,col)>
        score_thresh: threshold to segment score map
        nms_thresh  : threshold in nms
    Output:
        boxes       : final polys <numpy.ndarray, (n,9)>
    '''
    score = score[0, :, :]  # 去掉通道1的维度
    print('--------------------------二维的score.shape-------------------:',
          score.shape)
    xy_text = np.argwhere(score > score_thresh)  # n x 2, format is [r, c] #
    print('-------------------------xy_text.shape:', xy_text.shape)
    # 得到score大于置信度阈值的坐标 (n,2),n为n个像素点
    if xy_text.size == 0:
        return None
    xy_text = xy_text[np.argsort(xy_text[:, 0])]  # 将置信度阈值大于一定值的坐标,
    # 按照每个点的行坐标进行排序后的得到的xy_text中的从小到大的行索引,
    valid_pos = xy_text[:, ::-1].copy()  # n x 2, [x, y]# 将y,x 转换为x,y
    print('------------------valid_pos.shape()--------------:',
          valid_pos.shape)

    # 有效的坐标点
    valid_geo = geo[:, xy_text[:, 0],
                    xy_text[:, 1]]  # 5 x n #经过阈值筛选后的有效geo 5*n n为有效像素点的个数。
    print('-----------------------------valid_geo.shape---------------------:',
          valid_geo.shape)

    polys_restored, index = restore_polys(
        valid_pos, valid_geo, score.shape)  # 得到最终的预测框集合,以及在valid_pos中的id序号

    if polys_restored.size == 0:
        return None
    boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)  #
    boxes[:, :8] = polys_restored  # 装最终所有预测框4个点的信息
    boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]  # 对应预测框的置信度值
    if if_eval:
        return boxes
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'),
                                      nms_thresh)  #  经过NMS返回最终的预测框

    print('-----------------pixel中未经过NMS后的boxes.shape---------------:',
          boxes.shape)
    return boxes
Beispiel #16
0
    def detect(self,
               score_map,
               geo_map,
               score_thresh=0.8,
               cover_thresh=0.1,
               nms_thresh=0.2):
        """
        restore text boxes from score map and geo map
        """

        score_map = score_map[0]
        geo_map = np.swapaxes(geo_map, 1, 0)
        geo_map = np.swapaxes(geo_map, 1, 2)
        # filter the score map
        xy_text = np.argwhere(score_map > score_thresh)
        if len(xy_text) == 0:
            return []
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        #restore quad proposals
        text_box_restored = self.restore_rectangle_quad(
            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

        try:
            import lanms
            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
        except:
            print(
                'you should install lanms by pip3 install lanms-nova to speed up nms_locality'
            )
            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
        if boxes.shape[0] == 0:
            return []
        # Here we filter some low score boxes by the average score map, 
        #   this is different from the orginal paper.
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape(
                (-1, 4, 2)).astype(np.int32) // 4, 1)
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        boxes = boxes[boxes[:, 8] > cover_thresh]
        return boxes
def detect(score_maps,
           geo_maps,
           timer,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2,
           ratio_ws=None,
           ratio_hs=None):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:[W,H,5]
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_maps) != len(geo_maps) and len(geo_maps) != len(
            ratio_hs) and len(ratio_hs) != len(ratio_ws):
        print 'the number of different scales is not equal'
        assert False
    boxes = []
    for scale_idx in range(len(score_maps)):
        cur_score_map = score_maps[scale_idx]
        cur_geo_map = geo_maps[scale_idx]
        cur_ratio_w = ratio_ws[scale_idx]
        cur_ratio_h = ratio_hs[scale_idx]
        cur_boxes = detect_single_scale(cur_score_map, cur_geo_map,
                                        score_map_thresh, nms_thres,
                                        box_thresh, timer)
        cur_boxes_points = cur_boxes[:, :8].reshape((-1, 4, 2))
        cur_boxes_points[:, :, 0] /= cur_ratio_w
        cur_boxes_points[:, :, 1] /= cur_ratio_h
        cur_boxes = np.concatenate([
            np.reshape(cur_boxes_points, (-1, 8)),
            np.expand_dims(cur_boxes[:, 8], axis=1)
        ],
                                   axis=1)
        boxes.extend(cur_boxes)
    boxes = np.array(boxes)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    print('{} text boxes after final nms'.format(boxes.shape[0]))
    boxes = boxes[:, :8].reshape((-1, 4, 2))
    return boxes, timer
Beispiel #18
0
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
def merge_boxes(boxes, box_thresh=0.1, nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param boxes: boxes
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    boxes = np.asarray(boxes, dtype=np.float32)
    boxes = np.reshape(boxes, (-1, 9))

    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None

    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes
Beispiel #20
0
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
	score = score[0, :, :]
	xy_text = np.argwhere(score > score_thresh)
	if xy_text.size == 0:
		return None

	xy_text = xy_text[np.argsort(xy_text[:, 0])]
	valid_pos = xy_text[:, ::-1].copy()
	valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]]
	polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) 
	if polys_restored.size == 0:
		return None

	boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
	boxes[:, :8] = polys_restored
	boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
	boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)

	return boxes
Beispiel #21
0
def text_detection(score_map,
                   geo_map,
                   score_map_thresh=0.8,
                   box_thresh=0.1,
                   nms_thres=0.2):
    '''
        restore text boxes from score map and geo map

        :param score_map: Map of scores from EAST model
        :param geo_map: Map of geometries from EAST model
        :param score_map_thresh: threshhold for score map
        :param box_thresh: threshhold for boxes
        :param nms_thres: threshold for nms
        :return: List of rectangles
        '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None

    # here we filter some low score boxes by the average score map, this is different from the original paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes
def get_bounding_boxes_from_output(score_map, geo_map):
    """ Recreate the boxes from score map and geometry map.

    :param score_map: Score map.
    :param geo_map: Geometry map.
    :return: Restored boxes
    """
    index_text = np.argwhere(score_map > 0.9)

    restored_bounding_boxes = np.zeros((index_text.shape[0], 8))
    for i in range(index_text.shape[0]):
        indices = index_text[i]   # [y, x]
        restored_bounding_boxes[i] = restore_bounding_box([indices[1], indices[0]], geo_map[indices[0], indices[1], :])

    boxes = np.zeros((restored_bounding_boxes.shape[0], 9))
    boxes[:, :8] = restored_bounding_boxes
    boxes[:, 8] = score_map[index_text[:, 0], index_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), 0.2)

    return boxes
Beispiel #23
0
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
    '''get boxes from feature map
	Input:
		score       : score map from model <numpy.ndarray, (1,row,col)>
		geo         : geo map from model <numpy.ndarray, (5,row,col)>
		score_thresh: threshold to segment score map
		nms_thresh  : threshold in nms
	Output:
		boxes       : final polys <numpy.ndarray, (n,9)>
	'''
    time_6 = time.time()
    score = score[0, :, :]
    xy_text = np.argwhere(score > score_thresh)  # n x 2, format is [r, c]
    if xy_text.size == 0:
        return None, 0
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    valid_pos = xy_text[:, ::-1].copy()  # n x 2, [x, y]
    valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]]  # 5 x n
    polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
    if polys_restored.size == 0:
        return None, 0
    boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = polys_restored
    boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]

    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)

    for i, box in enumerate(boxes):
        mask = np.zeros_like(score, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        # print('111111111111-------',np.sum(mask))
        boxes[i, 8] = cv2.mean(score, mask)[0]
    boxes = boxes[boxes[:, 8] > 0.1]

    time_4 = time.time()
    time_nms = time_4 - time_6
    # print('nms---------',time_nms)
    return boxes, time_nms
Beispiel #24
0
def restore_bboxes(cls, rho, theta, cls_thresh=None, nms=True):
    # AX+BY+C=0  <---> ρ=x*cosθ+y*sinθ
    height = cls.shape[1]
    if cls_thresh is None:
        cls_thresh = cfg.test.cls_thresh

    region = (cls[0, :, :] > cls_thresh)
    ret, markers = cv2.connectedComponents(np.uint8(region))
    for i in range(1, ret):
        y, x = np.where(markers == i)
        if len(x) > (cfg.test.rm_small_cc_area * cfg.test.short_side)**2:
            continue
        for j in range(len(x)):
            region[y[j], x[j]] = False

    r, c = np.where(region)
    A, B, C = np.cos(theta), np.sin(
        theta), -rho  # the 'C' is actually "C + A*dx + B*dy"
    A0, B0, C0 = A[0, r, c], B[0, r, c], C[0, r, c]
    C0 -= A0 * c + B0 * (height - r)  # recover C in global coordinate
    A1, B1, C1 = A[1, r, c], B[1, r, c], C[1, r, c]
    C1 -= A1 * c + B1 * (height - r)  # recover C in global coordinate
    A2, B2, C2 = A[2, r, c], B[2, r, c], C[2, r, c]
    C2 -= A2 * c + B2 * (height - r)  # recover C in global coordinate
    A3, B3, C3 = A[3, r, c], B[3, r, c], C[3, r, c]
    C3 -= A3 * c + B3 * (height - r)  # recover C in global coordinate

    # lines -> bboxes
    x1, y1 = line_cross_point(A0, B0, C0, A1, B1, C1)
    x2, y2 = line_cross_point(A1, B1, C1, A2, B2, C2)
    x3, y3 = line_cross_point(A2, B2, C2, A3, B3, C3)
    x0, y0 = line_cross_point(A3, B3, C3, A0, B0, C0)
    bboxes = np.vstack((x0, height - y0, x1, height - y1, x2, height - y2, x3,
                        height - y3, cls[0, r, c])).T
    if nms:
        bboxes = lanms.merge_quadrangle_n9(bboxes.astype('float32'),
                                           cfg.test.nms_thresh)
    return bboxes
Beispiel #25
0
def detect_txt(score_path, txt_path, prob_thresh, short_side):
    npy_list = []
    for score in os.listdir(score_path):
        npy_list.append(score.split('_', 1)[1].split('.')[0])
    npy_list = list(set(npy_list))

    for npy_file in sorted(npy_list):
        print(npy_file)
        save_file = os.path.join(txt_path, 'res_' + npy_file + '.txt')
        vertices = []
        for short_side in short_sides:
            res = np.load(os.path.join(
                score_path, '{}_'.format(short_side) + npy_file + '.npy'),
                          allow_pickle=True).item()
            bboxes = restore_bboxes(res['cls'],
                                    res['rho'],
                                    res['theta'],
                                    prob_thresh,
                                    nms=False)

            for bbox in bboxes:
                pts = bbox[:8] * 4 * (res['origin_w'] / res['resize_w'] +
                                      res['origin_h'] / res['resize_h']) / 2
                vertices.append(pts.tolist() + [bbox[8]])

        bboxes = np.array(vertices)
        bboxes = lanms.merge_quadrangle_n9(bboxes.astype('float32'),
                                           cfg.test.nms_thresh)

        with open(save_file, 'w') as f:
            for vertice in bboxes:
                pts = np.int32(np.around(vertice[:8].flatten()))
                if not validate_clockwise_points(pts):
                    continue
                pts = pts.astype(str).tolist()
                pts = ','.join(pts) + ',' + '0.9' + '\n'  # str(conf[j])
                f.write(pts)
        sys.stdout.flush()
Beispiel #26
0
    def detect(score_map,
               geo_map,
               score_map_thresh=0.8,
               box_thresh=0.1,
               nms_thres=0.1):
        '''
        score_map: 128*128
        geo_map: 128*128*5
        '''
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, ]

        # filter the score map
        xy_text = np.argwhere(score_map > score_map_thresh)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        # restore
        text_box_restored = Toolbox.restore_rectangle_rbox(
            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0],
                                          xy_text[:, 1], :])  # N*4*2
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

        # nms part
        boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
        if boxes.shape[0] == 0:
            return np.array([])

        # here we filter some low score boxes by the average score map, this is different from the orginal paper
        # for i, box in enumerate(boxes):
        #     mask = np.zeros_like(score_map, dtype = np.uint8)
        #     cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        #     boxes[i, 8] = cv2.mean(score_map, mask)[0]
        # boxes = boxes[boxes[:, 8] > box_thresh]
        return boxes
Beispiel #27
0
def detect(F_score,
           F_geo,
           F_score_thresh=0.8,
           F_geo_thresh=0.1,
           nms_thres=0.2):
    if len(F_score.shape) == 4:
        F_score = F_score[0, :, :, 0]
        F_geo = F_geo[0, :, :, ]

    xy_text = np.argwhere(F_score > F_score_thresh)  # filter the score map
    xy_text = xy_text[np.argsort(
        xy_text[:, 0])]  # sort the text boxes via the y axis

    # restoration
    text_box_restored = restore_rectangle_from_rbox(
        xy_text[:, ::-1] * 4, F_geo[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = F_score[xy_text[:, 0], xy_text[:, 1]]

    # locality-awareness nms
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None

    # here we filter some low score boxes by the average score map, this is different from the original paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(F_score, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(F_score, mask)[0]
    boxes = boxes[boxes[:, 8] > F_geo_thresh]

    return boxes
Beispiel #28
0
def detect_mask(score_map,
                score_map_full,
                geo_map,
                timer,
                score_map_thresh=FLAGS.score_map_thresh,
                mask_thresh=FLAGS.mask_thresh,
                box_thresh=0.1,
                nms_thres=0.2,
                min_area=FLAGS.min_area):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param mask_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        score_map_full = score_map_full[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :]
    angle = geo_map_idx_[:, 4]
    xy_text_0 = xy_text[angle >= 0]
    xy_text_1 = xy_text[angle < 0]
    xy_text = np.concatenate([xy_text_0, xy_text_1])

    points = list(xy_text)
    for i in range(len(points)):
        points[i] = tuple(points[i])

    #points = list(zip(*np.where(score_map > mask_thresh)))
    points_dict = {}
    for i in range(len(points)):
        points_dict[points[i]] = i
    group_mask = dict.fromkeys(points, -1)

    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes_all = boxes + 0
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer, [], []

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    #dict_box = {}
    #if len(score_map.shape) == 4:
    #score_map = score_map[0, :, :, 0]
    #geo_map = geo_map[0, :, :, ]
    #xy_text = np.argwhere(score_map > mask_thresh)
    #xy_text = xy_text[np.argsort(xy_text[:, 0])]

    mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]],
                        dtype=np.uint8)
    mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3],
                           dtype=np.uint8)

    if boxes == [] or boxes is None:
        #if boxes.shape[0] < 1:
        return mask_colors, timer, [], []
    boxes_in = boxes + 0
    boxes_in[:, :8] = (boxes_in[:, :8] / 4).astype(np.int32)
    h, w = score_map.shape
    boxes_points = []
    cnt = 0
    for box in boxes_in:
        b_ps = []
        b = box[:8].reshape((4, 2))
        if np.linalg.norm(b[0] - b[1]) <= 1 or np.linalg.norm(b[3] -
                                                              b[0]) <= 1:
            continue
        xmin = int(max(np.min(b[:, 0]), 0))
        xmax = int(min(np.max(b[:, 0]), w - 1))
        ymin = int(max(np.min(b[:, 1]), 0))
        ymax = int(min(np.max(b[:, 1]), h - 1))
        #print(ymin,ymax,xmin,xmax)

        local_ = score_map_full[ymin:ymax + 1, xmin:xmax + 1]
        #print("score_map_full",score_map_full.max(),local_.max)
        local_mask = np.zeros_like(local_)
        b[:, 0] -= xmin
        b[:, 1] -= ymin
        cv2.fillPoly(local_mask, b.astype(np.int32)[np.newaxis, :, :], 1)
        local_ = local_ * local_mask
        #local_th = local_ + 0
        #print("mask_thresh",mask_thresh)
        #local_th[local_th<=mask_thresh] = 1
        #cv2.imwrite("local_"+str(cnt)+".jpg",local_*255)
        #cv2.imwrite("local_"+str(cnt)+"_th.jpg",local_th*255)
        #cnt += 1

        ps_idx = np.argwhere(local_ > mask_thresh)

        #ps_idx = np.where((xy_text[:,1]>=xmin) & (xy_text[:,1]<=xmax) & (xy_text[:,0]>=ymin) & (xy_text[:,0]<=ymax))[0]
        #for idx in ps_idx:
        #b_ps.append([xy_text[idx,1], xy_text[idx,0]])
        for idx in ps_idx:
            b_ps.append([idx[1] + xmin, idx[0] + ymin])

        if b_ps == []:
            continue
        boxes_points.append(b_ps)

    #print("boxes_points",boxes_points)

    mask_contours = []

    for b in boxes_points:
        mask_bin *= 0
        b = np.array(b)
        b = b[:, ::-1]
        b = b.transpose(1, 0)
        b = (b[0], b[1])
        mask_bin[:, :][b] = 255
        mask_colors[:, :, :][b] = 255

        area_ = np.sum(mask_bin / 255)
        if area_ < min_area or area_ >= h * w * 0.99:
            continue

        dilate_kernel_size = 3
        if FLAGS.mask_dilate:
            points_in_ = np.argwhere(mask_bin == 255)
            p_in = points_in_[int(len(points_in_) / 2)]
            #print("p_in",p_in)
            if tuple(p_in) in points_dict:
                box_ = boxes_all[points_dict[tuple(p_in)]]
                poly_h = min(np.linalg.norm(box_[0] - box_[3]),
                             np.linalg.norm(box_[1] - box_[2]))
                poly_w = min(np.linalg.norm(box_[0] - box_[1]),
                             np.linalg.norm(box_[2] - box_[3]))
                dilate_kernel_size = int(
                    min(poly_h, poly_w) * FLAGS.dilate_ratio)
            poly_rect = cv2.minAreaRect(points_in_.astype(np.float32))
            rect_height = min(poly_rect[1][0],
                              poly_rect[1][1]) * FLAGS.dilate_ratio
            dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)),
                                     3)
            #dilate_kernel_size = 3
            #print("dilate_kernel_size",dilate_kernel_size)
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size))
            mask_bin = cv2.dilate(mask_bin, kernel)

        contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_NONE)
        max_contour = contours[0]
        max_area = cv2.contourArea(max_contour)
        for i in range(1, len(contours)):
            if cv2.contourArea(contours[i]) > max_area:
                max_contour = contours[i]
                max_area = cv2.contourArea(max_contour)

        epsilon = 0.01 * cv2.arcLength(max_contour, True)
        approx = cv2.approxPolyDP(max_contour, epsilon, True)
        mask_contours.append(approx)

        cv2.drawContours(mask_colors, max_contour, -1, (0, 255, 0), 3)
        cv2.drawContours(mask_colors, approx, -1, (0, 0, 255), 3)
        #cv2.imshow("mask_colors", mask_colors)
        #cv2.waitKey(0)
    return mask_colors, timer, mask_contours, boxes
Beispiel #29
0
from lanms import merge_quadrangle_n9
import numpy as np

if __name__ == '__main__':
    # unit square with confidence 1
    q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')

    print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
Beispiel #30
0
                for i in range(num_detections[0]):
                    if scores[i] < score_thresh:
                        continue
                    p = rbox_2_polygon(xc[i], yc[i], w[i], h[i], boxes[i,4])
                    if p is not None:
                        for i in range(8):
                            ratio = ratio_x if i%2 ==0 else ratio_y
                            p[i] *= ratio
                        polys.append(p + [scores[i]])
                print 'convert time:', time.time() - t0 ###
            polys = np.array(polys, dtype=np.float32)

            # lanms
            nms_thresh = 0.2
            t0 = time.time()
            polys = lanms.merge_quadrangle_n9(polys, nms_thresh)
            nms_keep_thresh = 1.0
            if polys.shape[0] > 0:
                remain_index = np.where(polys[:,8] > nms_keep_thresh)[0]
                polys = polys[remain_index]
            print 'nms_time:', time.time() - t0, 'boxes:', polys.shape[0]  ###

            # save results
            res_fn = 'res_' + os.path.splitext(fn)[0] + '.txt'
            with open(os.path.join(output_dir, res_fn), 'w') as f_res:
                for p in polys:
                    f_res.write("{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d}\r\n".format(
                        int(p[0]),
                        int(p[1]),
                        int(p[2]),
                        int(p[3]),
Beispiel #31
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 39], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')
    input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))
    input_labels_split = tf.split(input_labels, len(gpus))
    
    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
		il = input_labels_split[i]
                total_loss, model_loss, f_score, f_geometry, _ = tower_loss(iis, isms, igms, itms, il, reuse_variables)
                #f_score, f_geometry = i_am_testing(iis)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                #print "below..."
                #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name])
                #print "above..."
                reuse_variables = True
                #print "below.."
                train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name]
                #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name]
                #print train_var
                #print "above..."
                train_var += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion')
                grads = opt.compute_gradients(total_loss, var_list=train_var)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step)
    #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)]
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')
    
    #####################################################################################################################
    # BLOCK MODIFIED BY ME
    #variables = slim.get_variables_to_restore()
    #var_list = []
    #for v in variables:
    #    if len(v.name.split('/')) == 1:
    #            var_list.append(v)
    #    elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'):
    #            var_list.append(v)
    #    else:
    #            pass
    #saver = tf.train.Saver(var_list)
    saver = tf.train.Saver(tf.global_variables())
    saver_restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    # removing the first conv layer
    #del saver_restore_vars[1]
    #saver_to_restore = tf.train.Saver(saver_restore_vars)
    #####################################################################################################################
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())
    
    init = tf.global_variables_initializer()
    #print '>> trainable variables: ',slim.get_trainable_variables()
    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    #my_char_l = "5"
    #my_char_U = ""
    data_size = 0
    train_data_indices = []
    list_of_img_pos = []
    with open('./cropped_annotations_5.txt', 'r') as f:
        annotation_file = f.readlines()
    #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f:
    #    annotation_file += f.readlines()
    idx = 0
    for line in annotation_file:
	if len(line)>1 and line[:13] == './cropped_img':# and str(line[14:27]) in training_list:
            data_size +=1
            train_data_indices.append(idx)
            list_of_img_pos.append(line[14:].split(".")[0]+".tiff")
        idx += 1
    list_of_img_all = os.listdir('./cropped_img')
    list_of_img_neg = np.array(list(set(list_of_img_all) - set(list_of_img_pos)))
    #print "Char model: " + my_char_U + my_char_l
    #print "Data size: " + str(data_size)
    epoch_size = data_size / (16 * 2)
    #print epoch_size
    print "This many steps per epoch: " + str(epoch_size)

    #list_of_img_neg_char = os.listdir('Data/j')

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
	    print '>> Checkpoint path: ', FLAGS.checkpoint_path
	    print '>> second stuff: ', os.path.basename(ckpt_state.model_checkpoint_path)
	    #all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[1]
	    var1 = saver_restore_vars[1]
	    del saver_restore_vars[1]
	    var2 = saver_restore_vars[422]
	    del saver_restore_vars[422]
	    #names = [var.name for var in saver_restore_vars]
	    saver_to_restore = tf.train.Saver(saver_restore_vars)	
	    #print '>> global vars: ', names.index('resnet_v1_50/conv1/weights/ExponentialMovingAverage:0')#[var.name for var in tf.global_variables()]
	    model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
	    # originally saver.restore(sess, model_path)
            saver_to_restore.restore(sess, model_path)
	    init_new_vars_op = tf.initialize_variables([var1, var2])
	    sess.run(init_new_vars_op)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)
        #print "below:"
        #tvars = tf.trainable_variables()
        #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name]
        #print g_vars
        #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50')
        #return
        print FLAGS.learning_rate
        print reg_constant
        for step in range(24*epoch_size):
            ### Generate Dwata ###
            data = [], [], [], [], []
            np.random.shuffle(train_data_indices)
            num_im = 0
            actual_num_im = 0
	    list_of_chars = list(string.ascii_lowercase)+[str(x) for x in range(10)]
            while len(data[0]) < 32:
                prob = 1#np.random.random(1)[0]
                if prob > 0.49:
                    i = train_data_indices[num_im]
                    im_fn = "./cropped_img/"+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
		    #print im_fn
                    im = cv2.imread(im_fn)
		    ################################################################################
                    # adding rest of the channels
                    for ids_c in range(len(list_of_chars)):
                        crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_cropped/'+list_of_chars[ids_c]+'/'
                        filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
                        pad = cv2.imread(filename)
                        pad = pad[:,:,0]
                        pad = np.expand_dims(pad, axis=2)
                        im = np.append(im, pad, axis = 2)
                    ################################################################################
		    ################################################################################
                    if im is not None:
                        r, c, _ = im.shape
		        text_polys = []
                        text_tags = []
                        if int(annotation_file[i+1]) > 0:
                            for idx in range(i+2,i+2+int(annotation_file[i+1])):
                                annotation_data = annotation_file[idx]
                                annotation_data = annotation_data.split(" ")
                                x, y = float(annotation_data[0]), float(annotation_data[1])
		                w, h = float(annotation_data[2]), float(annotation_data[3])
		                text_polys.append([list([int(x),int(y-h)]),list([int(x+w),int(y-h)]),list([int(x+w),int(y)]),list([int(x),int(y)])])
                                text_tags.append(False)
                        score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array(text_polys), np.array(text_tags))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
                        actual_num_im += 1  
                    num_im += 1
           
                else:
                    im_fn = np.random.choice(list_of_img_neg)
		    ################################################################################
                    # adding rest of the channels
                    #for i in range(len(list_of_chars)):
                    crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_single_cropped/'
                    filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff"
                    pad = cv2.imread(filename)
                    pad = pad[:,:,0]
                    pad = np.expand_dims(pad, axis=2)
                    im = np.append(im, pad, axis = 2)
                    ################################################################################
                    #    im_fn = np.random.choice(list_of_img_neg_char)
                    #    im_mini = cv2.imread("Data/j/" + im_fn)
		    # 	r0, c0, _ = im_mini.shape
                    #     im = np.zeros((512, 512, 3), dtype=np.uint8)
 		    #	ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2
                    #    im[ra:rb, ca:cb, :] = im_mini.copy()
                    if im is not None:
                        r, c, _ = im.shape
                        score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array([]), np.array([]))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
       
            ### Run model ###
    	    ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                    input_score_maps: data[2],
                                                                                    input_geo_maps: data[3],
                                                                                    input_training_masks: data[4]})
            epoch = step / epoch_size
            batch_num = step % epoch_size   
            if step % (epoch_size/3) == 0:   
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(ml)
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(tl)
    	    if step % (epoch_size/2) == 0:
		#print "Epoche: " + str(step / (epoch_size/2))
		saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)
    	        _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                                 input_score_maps: data[2],
                                                                                                 input_geo_maps: data[3],
                                                                                                 input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
            if False:
                count_right = 0
                count_wrong = 0
                count_posNotDetected = 0
                im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                w, h, _ = im0.shape
                slide_window = 300
                crop_size = 512
                crop_center = (256, 256)
                num_rows, num_cols = int(np.ceil(w/slide_window)), int(np.ceil(h/slide_window))
                print num_cols
		for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]:
                    im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                    boxes_one_rot = []  
		    count = 0
                    while count < num_rows * num_cols:
                        images, data2, data3, data4 = [], [], [], []
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols
                    
                            temp = im[slide_window*i:slide_window*i+crop_size, \
                                      slide_window*j:slide_window*j+crop_size, ::-1]
                            w2, h2, _ = temp.shape
                            if w2 < crop_size or h2 < crop_size:
                                result = np.zeros((crop_size,crop_size,3))
                                result[:w2,:h2] = temp
                                temp = result
                            M = cv2.getRotationMatrix2D(crop_center,rot,1.0)
                            temp = cv2.warpAffine(temp, M, (crop_size, crop_size))
                            images.append(temp)
			    score_map, geo_map, training_mask = icdar.generate_rbox((int(crop_size), int(crop_size)), np.array([]), np.array([]))
                            data2.append(score_map[::4, ::4, np.newaxis].astype(np.float32))
                            data3.append(geo_map[::4, ::4, :].astype(np.float32))
                            data4.append(training_mask[::4, ::4, np.newaxis].astype(np.float32))
                        score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: images, input_score_maps:data2,
                                                                                                 input_geo_maps: data3,
                                                                                                 input_training_masks: data4})
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols
                            boxes = detect(score_map=score[j], geo_map=geometry[j], score_map_thresh=0.01, box_thresh=0.01, nms_thres=0.01)
                            if boxes is not None:
                                boxes = boxes[:, :8].reshape((-1, 4, 2))
                                for box in boxes:
                                    M_inv = cv2.getRotationMatrix2D(crop_center,-1*rot,1)
                                    box[0] = M_inv.dot(np.array((box[0,0], box[0,1]) + (1,)))
                                    box[1] = M_inv.dot(np.array((box[1,0], box[1,1]) + (1,)))
                                    box[2] = M_inv.dot(np.array((box[2,0], box[2,1]) + (1,)))
                                    box[3] = M_inv.dot(np.array((box[3,0], box[3,1]) + (1,)))
                                    box = sort_poly(box.astype(np.int32))
                                    box[0,0] = box[0,0] + j * slide_window
                                    box[0,1] = box[0,1] + i * slide_window
                                    box[1,0] = box[1,0] + j * slide_window
                                    box[1,1] = box[1,1] + i * slide_window
                                    box[2,0] = box[2,0] + j * slide_window
                                    box[2,1] = box[2,1] + i * slide_window
                                    box[3,0] = box[3,0] + j * slide_window
                                    box[3,1] = box[3,1] + i * slide_window
                    boxes_one_rot.append(box)
                    boxes_single_rot = np.zeros((len(boxes_one_rot), 9))
                    boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape((-1, 8))
                    boxes_single_rot[:, 8] = 1
                    labels += boxes_single_rot.tolist()                                               
                boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres)
                annotation = np.load("/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" + "D0117-5755036" + ".npy").item()
                ### Compute the TP, FP, FN info for each image
                count_right_cache = 0
                boxes = boxes[:, :8].reshape((-1, 4, 2))
                num_true_pos = len(annotation)
                for box in boxes:
                    box = sort_poly(box.astype(np.int32))
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                        continue
                    k = 0
                    idx = 0
                    count_wrong += 1
                    while (idx < num_true_pos):
                        if k in annotation: 
                            proposed_label = annotation[k]['vertices']
                            if len(proposed_label) == 4:
                                x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \
                                                     proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1]
                                if (checkIOU(box, [[x0,y0],[x1,y1],[x2,y2],[x3,y3]]) == True):
                                    count_right_cache += 1
                                    count_wrong -= 1
                                    break 
                            idx += 1
                        k += 1
                count_posNotDetected += num_true_pos - count_right_cache
                count_right += count_right_cache
                precision = (float) (count_right) / (float) (count_right + count_wrong)  # TP / TP + FP
                recall = (float) (count_right) / (float) (count_right + count_posNotDetected)  # TP / TP + FN
                fscore = 2 * (precision * recall) / (precision + recall)
                print "Precision, recall, fscore: " + str(precision) + ", " + str(recall) + ", " + str(fscore)