Beispiel #1
0
def detect(score_map, geo_map, score_map_thresh, box_thresh, nms_thres):
    '''

    '''
    if len(score_map.shape) == 3:
        score_map = score_map[:, :, 0]
        #geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0)))
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes	
Beispiel #2
0
def detect(score_map, geo_map, score_map_thresh=0.1, box_thresh=0.005, nms_thres=0.25):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    xy_text = np.argwhere(score_map > score_map_thresh)
    if len(xy_text) < 1:
	return None
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    if boxes.shape[0] == 0:
        return None
    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, color=np.array((255,0,0)))
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes
Beispiel #3
0
def detect(score_map, geo_map, timer, score_map_thresh=0.1, box_thresh=0.1, nms_thres=0.5):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
def cal_IoU_gt_py(pred_geo, pred_cls, gt, threshold=0.8):
    def compute_IoU(polygon1, polygon2):
        '''
        计算两个rect的IoU值
        :param polygon1: 4, 2
        :param polygon2: 4, 2
        :return: 0~1 value
        '''
        polygon1 = Polygon(polygon1)
        if not polygon1.is_valid:
            polygon1 = polygon1.buffer(0)
        polygon2 = Polygon(polygon2)
        if not polygon2.is_valid:
            polygon2 = polygon2.buffer(0)
        intersection_polygon = polygon1.intersection(polygon2)
        if not intersection_polygon.is_valid:
            return 0.0
        intersection_area = intersection_polygon.area
        uion_area = polygon1.area + polygon2.area - intersection_area
        return (1.0 * intersection_area) / (1.0 * uion_area)

    '''
    根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值
    :param pred_geo: N, W, H, 5
    :param pred_cls: N, W, H, 1
    :param gt: N, M, 4, 2
    :param threshold: 0.8
    :return:
    '''
    # 删除纬度数是1的纬度

    pred_cls = np.squeeze(pred_cls)
    shape = np.shape(pred_geo)
    IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32)

    for batch_id in range(shape[0]):
        score_map = pred_cls[batch_id]
        geo_map = pred_geo[batch_id]
        cur_gt = gt[batch_id]

        if len(np.shape(score_map)) != 2:
            logging.log(logging.ERROR, 'score map shape isn\'t correct!')
            assert False
        xy_text = np.argwhere(score_map > threshold)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        # print 'The number of points that satisfy the condition is ', len(xy_text)
        text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2
        # print np.shape(text_box_restored)

        for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)):
            cur_IoU_value = 0.0
            for gt_id in range(len(cur_gt)):
                if np.sum(cur_gt[gt_id]) == -8:
                    break
                cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id])))
            IoU_gt[batch_id, x, y, 0] = cur_IoU_value
    return IoU_gt
    def detect(self,
               score_map,
               geo_map,
               timer,
               score_map_thresh=0.8,
               box_thresh=0.2,
               nms_thres=0.2):
        '''
        restore text boxes from score map and geo map
        :param score_map:
        :param geo_map:
        :param timer:
        :param score_map_thresh: threshhold for score map
        :param box_thresh: threshhold for boxes
        :param nms_thres: threshold for nms
        :return:
        '''
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, ]

        # filter the score map
        xy_text = np.argwhere(score_map > score_map_thresh)  # (560, 2)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]  # (560, 2)
        # print('{} text boxes after thresh'.format(xy_text.shape[0]))
        # restore
        start = time.time()
        # (1035, 4, 2)                       # (n,2)*4   (n,5)
        text_box_restored = restore_rectangle(
            xy_text[:, ::-1] * 4,  # x,y互换
            geo_map[xy_text[:, 0], xy_text[:, 1], :])
        boxes = np.zeros((text_box_restored.shape[0], 9),
                         dtype=np.float32)  # (N,9)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
        timer['restore'] = time.time() - start
        # nms part
        start = time.time()
        # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
        boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'),
                                          nms_thres)  #(2, 9)
        # print('{} text boxes after nms'.format(boxes.shape[0]))
        timer['nms'] = time.time() - start

        if boxes.shape[0] == 0:
            return None, timer

        # here we filter some low score boxes by the average score map, this is different from the orginal paper
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape(
                (-1, 4, 2)).astype(np.int32) // 4, 1)
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        boxes = boxes[boxes[:, 8] > box_thresh]

        return boxes, timer
Beispiel #6
0
def detect(score_map,
           geo_map,
           timer,
           score_map_thresh=0.8,
           box_threshold=0.1,
           merge_iou_threshold=0.1,
           nms_iou_threshold=0.3):
    """
    restore text boxes from score map and geo map
    :param score_map: ndarray, 形状为: (1, m, n, 1). 指示 (m, n) 的图中, 每个位置上为文本的概率/得分.
    :param geo_map: ndarray, 形状为: (1, m, n, 5). 指示 (m, n) 的图中, 每个位置上有文本的情况下, 文本框的矩形.
    5 个值, 前 4 个分别表示文本框上右下左边到其锚点的距离(该距离为原图像中真实的距离), 最后一个为文本逆时针旋转的角度.
    :param timer:
    :param score_map_thresh: 文本的, 得分及概率的阈值.
    :param box_threshold: 文本框平均得分阈值.
    :param merge_iou_threshold: Rect 矩形合并时的 IOU 阈值.
    :param nms_iou_threshold: 非极大值抑制的 IOU 阈值.
    :return:
    """
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]

    xy_text = np.argwhere(score_map > score_map_thresh)
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    start = time.time()
    text_box_restored = restore_rectangle(
        xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    score = np.expand_dims(score_map[xy_text[:, 0], xy_text[:, 1]], axis=1)
    boxes = np.concatenate([text_box_restored, score], axis=1)
    timer['restore'] = time.time() - start
    start = time.time()
    boxes = locality_aware_nms.locality_non_max_suppression(
        boxes=boxes.astype(np.float64),
        merge_iou_threshold=merge_iou_threshold,
        nms_iou_threshold=nms_iou_threshold)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # 计算每个 box 包含的点在 score_map 内的平均得分, 小于 box_thresh 的去除.
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_threshold]
    return boxes, timer
def detect_single_scale(score_map, geo_map, score_map_thresh, nms_thres,
                        box_thresh, timer):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    # xy_text[:, ::-1]*4 满足条件的pixel的坐标
    # geo_map[xy_text[:, 0], xy_text[:, 1], :] 得到对应点到bounding box 的距离
    text_box_restored = restore_rectangle(xy_text[:, ::-1],
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start

    # Modify Start
    # 我们以bounding box内的平均值作为nms的标准而不是一个点的值
    # new_boxes = np.copy(boxes)
    # for i, box in enumerate(new_boxes):
    #     mask = np.zeros_like(score_map, dtype=np.uint8)
    #     cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1)
    #     new_boxes[i, 8] = cv2.mean(score_map, mask)[0]
    # end

    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    # boxes = lanms.merge_quadrangle_n9(new_boxes.astype('float32'), nms_thres)

    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32), 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes
Beispiel #8
0
    def nms_boxBuild(self,
                     score_map,
                     geo_map,
                     timer,
                     ratio,
                     score_map_thresh=0.5,
                     box_thresh=0.1,
                     nms_thres=0.2):
        '''
        restore text boxes from score map and geo map
        :param score_map:
        :param geo_map:
        :param timer:
        :param score_map_thresh: threshhold for score map
        :param box_thresh: threshhold for boxes
        :param nms_thres: threshold for nms
        :return:
        '''
        if len(score_map.shape) == 4:
            score_map = score_map[0, :, :, 0]
            geo_map = geo_map[0, :, :, :]
        # filter the score map
        xy_text = np.argwhere(score_map > score_map_thresh)
        # print geo_map[np.where(score_map > score_map_thresh)][:, 4]
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        # restore
        # start = time.time()
        text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                              geo_map[xy_text[:, 0],
                                                      xy_text[:,
                                                              1], :])  # N*4*2
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
        boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
        # boxes = np.concatenate([boxes, _boxes], axis=0)

        # here we filter some low score boxes by the average score map, this is different from the orginal paper
        for i, box in enumerate(boxes):
            mask = np.zeros_like(score_map, dtype=np.uint8)
            cv2.fillPoly(mask, box[:8].reshape(
                (-1, 4, 2)).astype(np.int32) // 4, 1)
            boxes[i, 8] = cv2.mean(score_map, mask)[0]
        if len(boxes) > 0:
            boxes = boxes[boxes[:, 8] > box_thresh]

        return boxes, timer
Beispiel #9
0
def detect(score_map,
           geo_map,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, 0, :, :]
        geo_map = geo_map[0, :, :, :]
    # filter the score map
    print(score_map.max())
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[:, xy_text[:, 0],
                                                  xy_text[:, 1]])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    # nms part
    #boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)

    if boxes.shape[0] == 0:
        return None
    print('{} text boxes after nms'.format(boxes.shape[0]))
    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes
Beispiel #10
0
def detect(score_map,
           rbox_map,
           timer,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2):
    if len(score_map.shape) == 4:
        # score_map:[h,w], geo_map:[h,w,5]
        score_map = score_map[0, :, :, 0]
        rbox_map = rbox_map[0, :, :, ]

    # 获取满足阈值的所有文本区域像素点坐标值,对于检测到的score map根据阈值进行筛选,大于阈值的是文本区域,小于阈值非文本区域
    xy_text = np.argwhere(score_map > score_map_thresh)
    xy_text = xy_text[np.argsort(xy_text[:, 0])]

    # 根据rbox计算最小外接矩形
    # restore_rectangle这个函数作用:检测时输出是rbox结构即一点到矩形框4个边的距离+矩形框的一个角度,转换成文本对应的带角度的矩形框
    start = time.time()
    text_box_restored = restore_rectangle(
        xy_text[:, ::-1] * 4, rbox_map[xy_text[:, 0], xy_text[:, 1], :])
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start

    # 拿到变化后所以的矩形框经过NMS进行筛选
    # 局部感知NMS
    start = time.time()
    boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    # boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # 计算每个box对应区域所有score map点的均值a,并依据a进行进一步筛选
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]
    return boxes, timer
Beispiel #11
0
def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
Beispiel #12
0
def detect(score_map,
           geo_map,
           timer,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]

    xy_text = np.argwhere(score_map > score_map_thresh)

    xy_text = xy_text[np.argsort(xy_text[:, 0])]

    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start

    start = time.time()
    boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)

    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes, timer
Beispiel #13
0
def postprocess_image(score_map,
                      geo_map,
                      score_map_thresh=0.8,
                      box_thresh=0.1,
                      nms_thresh=0.2):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]

    # filter by the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    xy_text = xy_text[np.argsort(
        xy_text[:, 0])]  # sort the text boxes via the y axis

    # filter by the nms
    text_box_restored = restore_rectangle(
        xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thresh)
    print("num_boxes before nms = {}".format(text_box_restored.shape[0]))
    print("num_boxes after nms = {}".format(boxes.shape[0]))

    if boxes.shape[0] == 0:
        return None

    # filter low score boxes by the average score map (different from the original paper)
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes
Beispiel #14
0
def detect_dbscan(score_map,
                  geo_map,
                  timer,
                  score_map_thresh=FLAGS.mask_thresh,
                  box_thresh=0.1,
                  nms_thres=0.2,
                  min_area=FLAGS.min_area,
                  gpu_iou_id=GPU_IOU_ID):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)

    h, w = np.shape(score_map)
    #xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    #xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2

    geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :]
    angle = geo_map_idx_[:, 4]
    xy_text_0 = xy_text[angle >= 0]
    xy_text_1 = xy_text[angle < 0]
    xy_text = np.concatenate([xy_text_0, xy_text_1])

    #print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

    if boxes.shape[0] <= 1:
        return None, timer, [], []

    points = list(xy_text)
    for i in range(len(points)):
        points[i] = tuple(points[i])

    #points = list(zip(*np.where(score_map > mask_thresh)))
    points_dict = {}
    for i in range(len(points)):
        points_dict[points[i]] = i

    print("gpu_iou_id", gpu_iou_id)

    if gpu_iou_id < 0:
        print("using cpu IoU")
        time_iou = time.time()
        iou_dict = np.ones((boxes.shape[0], boxes.shape[0]), dtype=np.float32)
        areas_ = np.zeros((boxes.shape[0]), dtype=np.float32)
        for i in range(areas_.shape[0]):
            areas_[i] = cv2.contourArea((boxes[i, 0:8].reshape(
                (4, 2))).astype(np.float32))

        for i in range(iou_dict.shape[0]):
            for j in range(i + 1, iou_dict.shape[1]):
                iou_dict[i, j] = calc_iou_area(boxes[i], boxes[j], areas_[i],
                                               areas_[j])

        iou_dict = 1.0 - iou_dict * iou_dict.T
        print("time_cpu_iou:", time.time() - time_iou)
    else:
        print("using gpu IoU")
        #boxes_iou = boxes[:,:8]
        #boxes_iou = np.array(boxes_iou, dtype=np.float32)
        boxes_iou = []
        for b in boxes:
            boxes_iou.append(
                cv2.convexHull(b[:8].reshape(4, 2),
                               clockwise=False,
                               returnPoints=True).reshape(8))
        boxes_iou = np.array(boxes_iou).astype(np.int32).astype(np.float32)

        print(boxes_iou.shape)
        time_iou = time.time()
        iou_dict = 1.0 - gpu_iou_matrix(boxes_iou, boxes_iou, gpu_iou_id)
        print("time_gpu_iou:", time.time() - time_iou)
    #print(iou_dict,iou_dict.shape)

    in_index = np.arange((boxes.shape[0]))
    in_index = in_index[:, np.newaxis].astype(np.int32)

    def distance(a, b):
        #print(a,b)
        return iou_dict[int(a[0]), int(b[0])]

    time_DBSCAN = time.time()
    y_pred = DBSCAN(eps=0.2,
                    min_samples=10,
                    metric=lambda a, b: distance(a, b)).fit_predict(in_index)
    print("time_DBSCAN:", time.time() - time_DBSCAN)

    print("y_pred.shape", y_pred.shape, y_pred)
    print("xy_text.shape", xy_text.shape)
    #print(xy_text[0,0],xy_text[0,1])
    print(np.unique(y_pred))
    box_cnt = np.unique(y_pred)
    boxes_points = []
    for b_idx_ in range(box_cnt.max() + 1):
        p_idxs = np.argwhere(y_pred == b_idx_)[:, 0]
        #print("p_idxs.shape,p_idxs:",p_idxs.shape,p_idxs)
        if p_idxs.shape[0] < min_area:
            continue
        b_ps = []
        for p_idx_ in p_idxs:
            b_ps.append([xy_text[p_idx_, 1], xy_text[p_idx_, 0]])
        boxes_points.append(b_ps)

    mask_contours = []

    mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3],
                           dtype=np.uint8)
    mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]],
                        dtype=np.uint8)

    for b in boxes_points:
        mask_bin *= 0
        b = np.array(b)
        b = b[:, ::-1]
        b = b.transpose(1, 0)
        b = (b[0], b[1])
        mask_bin[b] = 255
        area_ = np.sum(mask_bin / 255)
        if area_ < min_area or area_ >= h * w * 0.99:
            continue

        dilate_kernel_size = 3
        if FLAGS.mask_dilate:
            points_in_ = np.argwhere(mask_bin == 255)
            p_in = points_in_[int(len(points_in_) / 2)]
            #print("p_in",p_in)
            if tuple(p_in) in points_dict:
                box_ = boxes[points_dict[tuple(p_in)]]
                poly_h = min(np.linalg.norm(box_[0] - box_[3]),
                             np.linalg.norm(box_[1] - box_[2]))
                poly_w = min(np.linalg.norm(box_[0] - box_[1]),
                             np.linalg.norm(box_[2] - box_[3]))
                dilate_kernel_size = int(
                    min(poly_h, poly_w) * FLAGS.dilate_ratio)
            poly_rect = cv2.minAreaRect(points_in_.astype(np.float32))
            rect_height = min(poly_rect[1][0],
                              poly_rect[1][1]) * FLAGS.dilate_ratio
            dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)),
                                     3)
            #dilate_kernel_size = 3
            #print("dilate_kernel_size",dilate_kernel_size)
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size))
            mask_bin = cv2.dilate(mask_bin, kernel)

        contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_NONE)
        epsilon = 0.01 * cv2.arcLength(contours[0], True)
        approx = cv2.approxPolyDP(contours[0], epsilon, True)
        mask_contours.append(approx)

        if not FLAGS.no_write_images:
            mask_colors[:, :, :][b] = np.random.randint(100, 255, size=3)

    timer['restore'] = time.time() - start

    return mask_colors, timer, mask_contours, []
Beispiel #15
0
def detect_pixellink(score_map,
                     geo_map,
                     timer,
                     mask_thresh=FLAGS.mask_thresh,
                     box_thresh=0.1,
                     nms_thres=0.2,
                     min_area=FLAGS.min_area):
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > mask_thresh)

    h, w = np.shape(score_map)
    #xy_text = np.argwhere(score_map > mask_thresh)
    # sort the text boxes via the y axis
    #xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2

    geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :]
    angle = geo_map_idx_[:, 4]
    xy_text_0 = xy_text[angle >= 0]
    xy_text_1 = xy_text[angle < 0]
    xy_text = np.concatenate([xy_text_0, xy_text_1])

    #print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

    points = list(xy_text)
    for i in range(len(points)):
        points[i] = tuple(points[i])

    #points = list(zip(*np.where(score_map > mask_thresh)))
    points_dict = {}
    for i in range(len(points)):
        points_dict[points[i]] = i
    group_mask = dict.fromkeys(points, -1)

    mask_contours = []

    mask = RegLink_func(points,
                        points_dict,
                        group_mask,
                        h,
                        w,
                        boxes,
                        rl_iou_th=FLAGS.rl_iou_th)

    mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3],
                           dtype=np.uint8)
    mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]],
                        dtype=np.uint8)
    #for i in np.unique(mask):

    for i in range(1, mask.max() + 1):
        mask_bin *= 0
        mask_bin[mask == i] = 255
        area_ = np.sum(mask_bin / 255)
        if area_ < min_area or area_ >= h * w * 0.99:
            continue

        dilate_kernel_size = 3
        if FLAGS.mask_dilate:
            points_in_ = np.argwhere(mask_bin == 255)
            p_in = points_in_[int(len(points_in_) / 2)]
            #print("p_in",p_in)
            if tuple(p_in) in points_dict:
                box_ = boxes[points_dict[tuple(p_in)]]
                poly_h = min(np.linalg.norm(box_[0] - box_[3]),
                             np.linalg.norm(box_[1] - box_[2]))
                poly_w = min(np.linalg.norm(box_[0] - box_[1]),
                             np.linalg.norm(box_[2] - box_[3]))
                dilate_kernel_size = int(
                    min(poly_h, poly_w) * FLAGS.dilate_ratio)
            poly_rect = cv2.minAreaRect(points_in_.astype(np.float32))
            rect_height = min(poly_rect[1][0],
                              poly_rect[1][1]) * FLAGS.dilate_ratio
            dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)),
                                     3)
            #dilate_kernel_size = 3
            #print("dilate_kernel_size",dilate_kernel_size)
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size))
            mask_bin = cv2.dilate(mask_bin, kernel)

        contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_NONE)
        epsilon = 0.01 * cv2.arcLength(contours[0], True)
        approx = cv2.approxPolyDP(contours[0], epsilon, True)
        mask_contours.append(approx)
        if not FLAGS.no_write_images:
            mask_colors[mask == i, :] = np.random.randint(100, 255, size=3)

    timer['restore'] = time.time() - start

    return mask_colors, timer, mask_contours, []
Beispiel #16
0
def detect_mask(score_map,
                score_map_full,
                geo_map,
                timer,
                score_map_thresh=FLAGS.score_map_thresh,
                mask_thresh=FLAGS.mask_thresh,
                box_thresh=0.1,
                nms_thres=0.2,
                min_area=FLAGS.min_area):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param mask_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        score_map_full = score_map_full[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    geo_map_idx_ = geo_map[xy_text[:, 0], xy_text[:, 1], :]
    angle = geo_map_idx_[:, 4]
    xy_text_0 = xy_text[angle >= 0]
    xy_text_1 = xy_text[angle < 0]
    xy_text = np.concatenate([xy_text_0, xy_text_1])

    points = list(xy_text)
    for i in range(len(points)):
        points[i] = tuple(points[i])

    #points = list(zip(*np.where(score_map > mask_thresh)))
    points_dict = {}
    for i in range(len(points)):
        points_dict[points[i]] = i
    group_mask = dict.fromkeys(points, -1)

    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    boxes_all = boxes + 0
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer, [], []

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    #dict_box = {}
    #if len(score_map.shape) == 4:
    #score_map = score_map[0, :, :, 0]
    #geo_map = geo_map[0, :, :, ]
    #xy_text = np.argwhere(score_map > mask_thresh)
    #xy_text = xy_text[np.argsort(xy_text[:, 0])]

    mask_bin = np.zeros([score_map.shape[0], score_map.shape[1]],
                        dtype=np.uint8)
    mask_colors = np.zeros([score_map.shape[0], score_map.shape[1], 3],
                           dtype=np.uint8)

    if boxes == [] or boxes is None:
        #if boxes.shape[0] < 1:
        return mask_colors, timer, [], []
    boxes_in = boxes + 0
    boxes_in[:, :8] = (boxes_in[:, :8] / 4).astype(np.int32)
    h, w = score_map.shape
    boxes_points = []
    cnt = 0
    for box in boxes_in:
        b_ps = []
        b = box[:8].reshape((4, 2))
        if np.linalg.norm(b[0] - b[1]) <= 1 or np.linalg.norm(b[3] -
                                                              b[0]) <= 1:
            continue
        xmin = int(max(np.min(b[:, 0]), 0))
        xmax = int(min(np.max(b[:, 0]), w - 1))
        ymin = int(max(np.min(b[:, 1]), 0))
        ymax = int(min(np.max(b[:, 1]), h - 1))
        #print(ymin,ymax,xmin,xmax)

        local_ = score_map_full[ymin:ymax + 1, xmin:xmax + 1]
        #print("score_map_full",score_map_full.max(),local_.max)
        local_mask = np.zeros_like(local_)
        b[:, 0] -= xmin
        b[:, 1] -= ymin
        cv2.fillPoly(local_mask, b.astype(np.int32)[np.newaxis, :, :], 1)
        local_ = local_ * local_mask
        #local_th = local_ + 0
        #print("mask_thresh",mask_thresh)
        #local_th[local_th<=mask_thresh] = 1
        #cv2.imwrite("local_"+str(cnt)+".jpg",local_*255)
        #cv2.imwrite("local_"+str(cnt)+"_th.jpg",local_th*255)
        #cnt += 1

        ps_idx = np.argwhere(local_ > mask_thresh)

        #ps_idx = np.where((xy_text[:,1]>=xmin) & (xy_text[:,1]<=xmax) & (xy_text[:,0]>=ymin) & (xy_text[:,0]<=ymax))[0]
        #for idx in ps_idx:
        #b_ps.append([xy_text[idx,1], xy_text[idx,0]])
        for idx in ps_idx:
            b_ps.append([idx[1] + xmin, idx[0] + ymin])

        if b_ps == []:
            continue
        boxes_points.append(b_ps)

    #print("boxes_points",boxes_points)

    mask_contours = []

    for b in boxes_points:
        mask_bin *= 0
        b = np.array(b)
        b = b[:, ::-1]
        b = b.transpose(1, 0)
        b = (b[0], b[1])
        mask_bin[:, :][b] = 255
        mask_colors[:, :, :][b] = 255

        area_ = np.sum(mask_bin / 255)
        if area_ < min_area or area_ >= h * w * 0.99:
            continue

        dilate_kernel_size = 3
        if FLAGS.mask_dilate:
            points_in_ = np.argwhere(mask_bin == 255)
            p_in = points_in_[int(len(points_in_) / 2)]
            #print("p_in",p_in)
            if tuple(p_in) in points_dict:
                box_ = boxes_all[points_dict[tuple(p_in)]]
                poly_h = min(np.linalg.norm(box_[0] - box_[3]),
                             np.linalg.norm(box_[1] - box_[2]))
                poly_w = min(np.linalg.norm(box_[0] - box_[1]),
                             np.linalg.norm(box_[2] - box_[3]))
                dilate_kernel_size = int(
                    min(poly_h, poly_w) * FLAGS.dilate_ratio)
            poly_rect = cv2.minAreaRect(points_in_.astype(np.float32))
            rect_height = min(poly_rect[1][0],
                              poly_rect[1][1]) * FLAGS.dilate_ratio
            dilate_kernel_size = max(int(min(dilate_kernel_size, rect_height)),
                                     3)
            #dilate_kernel_size = 3
            #print("dilate_kernel_size",dilate_kernel_size)
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (dilate_kernel_size, dilate_kernel_size))
            mask_bin = cv2.dilate(mask_bin, kernel)

        contours, hierarchy = cv2.findContours(mask_bin, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_NONE)
        max_contour = contours[0]
        max_area = cv2.contourArea(max_contour)
        for i in range(1, len(contours)):
            if cv2.contourArea(contours[i]) > max_area:
                max_contour = contours[i]
                max_area = cv2.contourArea(max_contour)

        epsilon = 0.01 * cv2.arcLength(max_contour, True)
        approx = cv2.approxPolyDP(max_contour, epsilon, True)
        mask_contours.append(approx)

        cv2.drawContours(mask_colors, max_contour, -1, (0, 255, 0), 3)
        cv2.drawContours(mask_colors, approx, -1, (0, 0, 255), 3)
        #cv2.imshow("mask_colors", mask_colors)
        #cv2.waitKey(0)
    return mask_colors, timer, mask_contours, boxes
Beispiel #17
0
def detect(image,
           score_map,
           geo_map,
           timer,
           im_fn,
           score_map_thresh=0.8,
           box_thresh=0.3,
           nms_thres=0.1):
    '''
    restore text boxes from score map and geo map
    :param image:
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map['F_score1'].shape) == 4:
        score_map1 = score_map['F_score1'][0, :, :, 0]
        score_map2 = score_map['F_score2'][0, :, :, 0]
        geo_map1 = geo_map['F_geometry1'][0, :, :, ]
        geo_map2 = geo_map['F_geometry2'][0, :, :, ]
    # filter the score map
#     pyplot.imshow(score_map1)
#     pyplot.savefig('./out/1_'+os.path.basename(im_fn))

#     pyplot.imshow(score_map2)
#     pyplot.savefig('./out/2_'+os.path.basename(im_fn))

    xy_text1 = np.argwhere(score_map1 > score_map_thresh)

    # sort the text boxes via the y axis
    xy_text1 = xy_text1[np.argsort(xy_text1[:, 0])]

    xy_text2 = np.argwhere(score_map2 > score_map_thresh)

    # sort the text boxes via the y axis
    xy_text2 = xy_text2[np.argsort(xy_text2[:, 0])]

    # restore
    start = time.time()
    # print("hello2")
    # print(xy_text1[0,:])
    # print(xy_text2[0,:])
    # print(geo_map1[xy_text1[0, 0], xy_text1[0, 1], :])
    # print(geo_map2[xy_text2[0, 0], xy_text2[0, 1], :])
    text_box_restored1 = restore_rectangle(xy_text1[:, ::-1] * 4,
                                           geo_map1[xy_text1[:, 0],
                                                    xy_text1[:,
                                                             1], :])  # N*4*2
    text_box_restored2 = restore_rectangle(xy_text2[:, ::-1] * 8,
                                           geo_map2[xy_text2[:, 0],
                                                    xy_text2[:,
                                                             1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored1.shape[0] +
                                            text_box_restored2.shape[0]))
    # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32)
    boxes1 = np.zeros((text_box_restored1.shape[0], 9), dtype=np.float32)
    boxes2 = np.zeros((text_box_restored2.shape[0], 9), dtype=np.float32)

    boxes1[:, :8] = text_box_restored1.reshape((-1, 8))
    boxes1[:, 8] = score_map1[xy_text1[:, 0], xy_text1[:, 1]]

    boxes2[:, :8] = text_box_restored2.reshape((-1, 8))
    boxes2[:, 8] = score_map2[xy_text2[:, 0], xy_text2[:, 1]]

    boxes = np.concatenate((boxes1, boxes2), axis=0)

    # print(boxes.shape)
    # print(boxes[0, :8])
    timer['restore'] = time.time() - start
    # # Re-Score
    # start = time.time()
    # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh)
    # timer['rescore'] = time.time() - start

    # if len(score_map1.shape) == 4:
    #     score_map1 = score_map1[0, :, :, 0]
    #     geo_map1 = geo_map1[0, :, :, ]
    # # filter the score map

    # xy_text1 = np.argwhere(score_map1 > score_map_thresh)
    #
    #
    # # sort the text boxes via the y axis
    # xy_text1 = xy_text1[np.argsort(xy_text1[:, 0])]
    # # restore
    # text_box_restored1 = restore_rectangle(xy_text1[:, ::-1]*8, geo_map1[xy_text1[:, 0], xy_text1[:, 1], :]) # N*4*2
    # # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32)
    # boxes1 = np.zeros((text_box_restored1.shape[0], 9), dtype=np.float32)
    # boxes1[:, :8] = text_box_restored1.reshape((-1, 8))
    # boxes1[:, 8] = score_map1[xy_text1[:, 0], xy_text1[:, 1]]
    # # # Re-Score
    # # start = time.time()
    # # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh)
    # # timer['rescore'] = time.time() - start

    # nms part
    start = time.time()
    #boxes = nms_locality.nms_locality(boxes.astype(np.float32), nms_thres)
    # boxes = nms_locality.standard_nms(boxes.astype(np.float32), nms_thres)
    # boxes = nms_locality.two_criterion_nms(boxes.astype(np.float64), nms_thres)

    #boxes = np.concatenate([boxes,boxes1])
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    print(boxes)

    boxes_final = joint(boxes[:, :8])
    print("Fix Bug ", boxes_final)

    for i in range(len(boxes_final)):
        pts = np.array(boxes_final[i].strip().split(',')).astype(
            np.float32).reshape(-1, 2)
        pts = np.rint(pts).astype(np.int)
        print(pts)
        pts.tolist()
        pts1 = []
        for i in range(len(pts)):
            pts[i] = tuple(pts[i])
            pts1.append(pts[i])
        print(pts1)
        # compute centroid
        cent = (sum([p[0] for p in pts1]) / len(pts1),
                sum([p[1] for p in pts1]) / len(pts1))
        print(cent)
        # sort by polar angle
        pts1.sort(key=lambda p: math.atan2(p[1] - cent[1], p[0] - cent[0]))

        for i in range(len(pts1)):
            pts1[i] = list(pts1[i])
        print(pts1)
        pts1 = np.array(pts1)

    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # if DEBUG:
    #     boxes = boxes[np.argsort(boxes[:, 8])[::-1]]
    #     boxes = boxes[np.argsort(boxes[:, 9])[::-1]]
    #     boxes = boxes[:10]
    #     print('selected scores: ', boxes[:, 8])
    #     print('selected rescores: ', boxes[:, 9])

    # here we filter some low score boxes by the average score map, this is different from the orginal paper


#     for i, box in enumerate(boxes):
#         mask = np.zeros_like(score_map1, dtype=np.uint8)
#         cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
#         boxes[i, 8] = cv2.mean(score_map1, mask)[0]
#     boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes[:, :9], timer
Beispiel #18
0
def IRB(box,
        score_map,
        geo_map,
        score_map_thresh,
        show_log=True,
        iter_max=10,
        iter_stop_iou=0.99,
        merge_iou=0.2):
    #########################IRB#########################
    #->:all box -> nms -> box
    #1-> in box,4points
    #2->4box
    #3-> min area box
    start_time = time.time()
    pre_box = box
    iou = 0
    pre_iou = -1
    iter_cnt = 0
    #point_boxes = []
    while iou < iter_stop_iou and pre_iou != iou and iter_cnt < iter_max:
        pre_iou = iou
        iter_cnt += 1
        #####1-> in box,4points:   p0-p3
        b = pre_box // 4
        min1 = max(min(b[0], b[2], b[4], b[6]), 0)
        max1 = min(max(b[0], b[2], b[4], b[6]), score_map.shape[1])
        min2 = max(min(b[1], b[3], b[5], b[7]), 0)
        max2 = min(max(b[1], b[3], b[5], b[7]), score_map.shape[0])
        #local_score = score_map[int(min1//4) : int(max1//4+1), int(min2//4) : int(max2//4+1)]
        local_score = score_map[int(min2):int(max2 + 1),
                                int(min1):int(max1 + 1)]
        local_b = np.array([
            b[0] - min1, b[1] - min2, b[2] - min1, b[3] - min2, b[4] - min1,
            b[5] - min2, b[6] - min1, b[7] - min2
        ])
        local_score = np.array(local_score)
        mask = np.zeros_like(local_score, dtype=np.uint8)

        shrinked = True
        if shrinked:
            poly = local_b.reshape((4, 2)).astype(np.int32)
            r = [None, None, None, None]
            for i in range(4):
                r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]),
                           np.linalg.norm(poly[i] - poly[(i - 1) % 4]))
            shrinked_poly = shrink_poly_hjb_v0(poly.copy(),
                                               r,
                                               shrink_ratio=0.3)
            #shrinked_poly = shrink_poly(poly.copy(), r,shrink_ratio=0.3)
            cv2.fillPoly(mask,
                         shrinked_poly.astype(np.int32)[np.newaxis, :, :], 1)
        else:
            cv2.fillPoly(mask, local_b.reshape((-1, 4, 2)).astype(np.int32), 1)

        local_score_masked = local_score * mask

        xy_text = np.argwhere(local_score_masked > score_map_thresh)
        if len(xy_text) == 0:
            if shrinked == False:
                return pre_box, time.time() - start_time, iter_cnt
            else:
                cv2.fillPoly(mask,
                             local_b.reshape((-1, 4, 2)).astype(np.int32), 1)
                local_score_masked = local_score * mask
                xy_text = np.argwhere(local_score_masked > score_map_thresh)
                if len(xy_text) == 0:
                    return pre_box, time.time() - start_time, iter_cnt
        p0 = np.argmin(xy_text[:, 0])
        p1 = np.argmax(xy_text[:, 0])
        p2 = np.argmin(xy_text[:, 1])
        p3 = np.argmax(xy_text[:, 1])

        #####2->4box:    b_s[]
        mask = np.zeros_like(local_score_masked, dtype=np.uint8)
        mask[xy_text[p0, :][0], xy_text[p0, :][1]] = 1
        mask[xy_text[p1, :][0], xy_text[p1, :][1]] = 1
        mask[xy_text[p2, :][0], xy_text[p2, :][1]] = 1
        mask[xy_text[p3, :][0], xy_text[p3, :][1]] = 1

        xy_text = np.argwhere(mask == 1)
        xy_text[:, 0] += int(min2)
        xy_text[:, 1] += int(min1)
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        #print("xy_text:",xy_text,len(xy_text)," points:",p0,p1,p2,p3)
        text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                              geo_map[xy_text[:, 0],
                                                      xy_text[:,
                                                              1], :])  # N*4*2
        b_s = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        b_s[:, :8] = text_box_restored.reshape((-1, 8))
        b_s[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]

        #####3-> min area box
        score_points = b_s[:, 8].sum() / len(b_s)
        score = score_points
        b_iou = []
        for pb in b_s:
            iou = calc_iou(pre_box, pb)
            if iou > merge_iou:
                b_iou.append(pb)
        if len(b_iou) == 0:
            return pre_box, time.time() - start_time, iter_cnt

        b_iou.append(pre_box)
        b_iou = np.array(b_iou)
        b_iou = b_iou[:, :8].reshape((-1, 2))
        rect = cv2.minAreaRect(b_iou)
        points = cv2.boxPoints(rect)
        points = points.reshape((-1))
        current_box = np.insert(points, len(points), score)
        #point_boxes.append(current_box)

        #####stop iou
        iou = calc_iou(pre_box, current_box)

        pre_box = current_box
        if show_log:
            print("iter_cnt", iter_cnt, iou)

    #point_boxes = np.array(point_boxes)
    #point_boxes = lanms.merge_quadrangle_n9(point_boxes.astype('float32'), nms_thres)
    IRB2box_time = time.time() - start_time
    if show_log:
        print("IRB2box time:", IRB2box_time)
    return current_box, IRB2box_time, iter_cnt
Beispiel #19
0
    sharedconv = model_sharedconv(im.copy())  # x_batch['images'].shape
    f_score_, geo_score_ = model_detection(sharedconv)

    # -------- #
    score_map_thresh = 0.20
    f_score = f_score_[0, :, :, 0].numpy()
    geo_score = geo_score_[0, :, :, ].numpy()

    # filter out by score map
    xy_text = np.argwhere(f_score > score_map_thresh)
    xy_text = xy_text[np.argsort(xy_text[:, 0])]

    if len(xy_text) > 0:

        # restore to coordinates
        text_box_restored = restore_rectangle(origin=xy_text[:, ::-1] * 4,
                                              geometry=geo_score[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2

        # filter out by average score
        # box_thresh = 0.95
        # ids = []
        # for i, box in enumerate(text_box_restored):
        #     mask = np.zeros_like(f_score_[0, :, :, :], dtype=np.uint8)
        #     mask = cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1)
        #     id = cv2.mean(f_score_[0, :, :, :].numpy(), mask)[0]
        #     ids.append(id)
        # text_box_restored = text_box_restored[np.array(ids) > box_thresh]

        # nms
        selected_indices = tf.image.non_max_suppression(boxes=text_box_restored[:, ::2, :].reshape((-1, 4)).astype(np.float32),
                                                        scores=f_score[xy_text[:, 0], xy_text[:, 1]],
                                                        max_output_size=50,
Beispiel #20
0
def detect(image,
           score_map,
           geo_map,
           timer,
           score_map_thresh=0.8,
           box_thresh=0.1,
           nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param image:
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    # boxes = np.zeros((text_box_restored.shape[0], 10), dtype=np.float32)
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # # Re-Score
    # start = time.time()
    # boxes[:, 9] = rescore(image, boxes, score_map > score_map_thresh)
    # timer['rescore'] = time.time() - start

    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    # boxes = nms_locality.standard_nms(boxes.astype(np.float64), nms_thres)
    # boxes = nms_locality.two_criterion_nms(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        return None, timer

    # if DEBUG:
    #     boxes = boxes[np.argsort(boxes[:, 8])[::-1]]
    #     boxes = boxes[np.argsort(boxes[:, 9])[::-1]]
    #     boxes = boxes[:10]
    #     print('selected scores: ', boxes[:, 8])
    #     print('selected rescores: ', boxes[:, 9])

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    return boxes[:, :9], timer
def cal_IoU_gt_py_multiprocess(pred_geo, pred_cls, gt, threshold=0.8):
    def compute_IoU(polygon1, polygon2):
        '''
        计算两个rect的IoU值
        :param polygon1: 4, 2
        :param polygon2: 4, 2
        :return: 0~1 value
        '''
        polygon1 = Polygon(polygon1)
        if not polygon1.is_valid:
            polygon1 = polygon1.buffer(0)
        polygon2 = Polygon(polygon2)
        if not polygon2.is_valid:
            polygon2 = polygon2.buffer(0)
        intersection_polygon = polygon1.intersection(polygon2)
        if not intersection_polygon.is_valid:
            return 0.0
        intersection_area = intersection_polygon.area
        uion_area = polygon1.area + polygon2.area - intersection_area
        return (1.0 * intersection_area) / (1.0 * uion_area)

    '''
    根据预测得到的pred_geo 和 pred_cls 我们针对每个pixel都可以计算他和ground truth的IoU值
    :param pred_geo: N, W, H, 5
    :param pred_cls: N, W, H, 1
    :param gt: N, M, 4, 2
    :param threshold: 0.8
    :return:
    '''
    # 删除纬度数是1的纬度

    print 'hello0'
    pred_cls = np.squeeze(pred_cls)
    shape = np.shape(pred_geo)
    IoU_gt = np.zeros([shape[0], shape[1], shape[2], 1], np.float32)

    for batch_id in range(shape[0]):
        process_num = 8
        pool = Pool(processes=process_num)
        print 'hello1'
        score_map = pred_cls[batch_id]
        geo_map = pred_geo[batch_id]
        cur_gt = gt[batch_id]

        print 'hello2'
        # print 'the shape of score_map is ', np.shape(score_map)
        # print 'the shape of geo_map is ', np.shape(geo_map)
        if len(np.shape(score_map)) != 2:
            logging.log(logging.ERROR, 'score map shape isn\'t correct!')
            assert False
        xy_text = np.argwhere(score_map > threshold)
        # sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 0])]
        # print 'The number of points that satisfy the condition is ', len(xy_text)
        text_box_restored = restore_rectangle(xy_text[:, ::-1], geo_map[xy_text[:, 0], xy_text[:, 1], :])  # N*4*2
        # print np.shape(text_box_restored)

        pre_process_num = len(xy_text) / process_num + 1
        xss = {}
        yss = {}
        boxss = {}

        print 'hello3'
        for idx, ((x, y), box) in enumerate(zip(xy_text, text_box_restored)):
            process_id = idx / pre_process_num
            if process_id not in xss.keys():
                xss[process_id] = []
                yss[process_id] = []
                boxss[process_id] = []
                xss[process_id].append(x)
                yss[process_id].append(y)
                boxss[process_id].append(box)
            else:
                xss[process_id].append(x)
                yss[process_id].append(y)
                boxss[process_id].append(box)

        print 'hello4'

        def process_single_test():
            return 1.0
        def process_single(boxs, cur_gt):
            print 'hello4-0'
            IoU_values = []
            print 'hello4-1'
            return np.random.random(len(boxs))
            for box in boxs:
                cur_IoU_value = 0.0
                print 'hello4-2'
                for gt_id in range(len(cur_gt)):
                    if np.sum(cur_gt[gt_id]) == -8:
                        break
                    cur_IoU_value = max(cur_IoU_value, compute_IoU(np.asarray(box), np.asarray(cur_gt[gt_id])))
                IoU_values.append(cur_IoU_value)
                print 'hello4-3'
            print 'hello4-3'
            return IoU_values
        results = []

        print 'hello5'
        for process_id in range(process_num):
            print 'hello6'
            # results.append(pool.apply_async(func=process_single, args=(boxss[process_id], cur_gt, )))
            results.append(pool.apply_async(func=process_single_test, args=()))
            print 'hello7'
        pool.close()
        pool.join()

        print 'hello8'
        for process_id, res in enumerate(results):
            xs = xss[process_id]
            ys = yss[process_id]

            print 'hello9'
            xs = np.asarray(xs)
            ys = np.asarray(ys)
            print np.shape(xs)
            print np.shape(ys)
            IoU_values = res.get()
            xs = np.asarray(xs)
            ys = np.asarray(ys)
            print np.shape(IoU_values)
            print np.shape(xs)
            print np.shape(ys)
            IoU_gt[batch_id, xs, ys, 0] = IoU_values

            print 'hello10'

        print 'hello11'
    return IoU_gt
Beispiel #22
0
def detect(score_map,
           geo_map,
           timer,
           score_map_thresh=FLAGS.threshold,
           box_thresh=FLAGS.box_thresh,
           nms_thres=0.2):
    '''
    restore text boxes from score map and geo map
    :param score_map:
    :param geo_map:
    :param timer:
    :param score_map_thresh: threshhold for score map
    :param box_thresh: threshhold for boxes
    :param nms_thres: threshold for nms
    :return:
    '''
    if len(score_map.shape) == 4:
        score_map = score_map[0, :, :, 0]
        geo_map = geo_map[0, :, :, ]
    # filter the score map
    xy_text = np.argwhere(score_map > score_map_thresh)
    # sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    # restore
    start = time.time()
    text_box_restored = restore_rectangle(xy_text[:, ::-1] * 4,
                                          geo_map[xy_text[:, 0],
                                                  xy_text[:, 1], :])  # N*4*2
    print('{} text boxes before nms'.format(text_box_restored.shape[0]))
    boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = text_box_restored.reshape((-1, 8))
    boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
    timer['restore'] = time.time() - start
    # nms part
    start = time.time()
    # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres)
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres)
    timer['nms'] = time.time() - start

    if boxes.shape[0] == 0:
        if FLAGS.IRB == False:
            return None, timer
        else:
            return None, timer, 0, 0

    # here we filter some low score boxes by the average score map, this is different from the orginal paper
    for i, box in enumerate(boxes):
        mask = np.zeros_like(score_map, dtype=np.uint8)
        cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4,
                     1)
        boxes[i, 8] = cv2.mean(score_map, mask)[0]
    boxes = boxes[boxes[:, 8] > box_thresh]

    if FLAGS.IRB == False:
        return boxes, timer

    #########################IRB#########################
    #->:all box -> nms -> box
    #1-> in box,4points
    #2->4box
    #3-> min area box
    point_boxes = []
    IRB2box_times = 0
    iter_cnts = 0
    for b in boxes:
        poly = b[0:8].reshape((4, 2)).astype(np.int32)
        poly_h = max(np.linalg.norm(poly[0] - poly[3]),
                     np.linalg.norm(poly[1] - poly[2]))
        poly_w = max(np.linalg.norm(poly[0] - poly[1]),
                     np.linalg.norm(poly[2] - poly[3]))
        if max(poly_h, poly_w) > FLAGS.start_IRB_max_len:
            current_box, IRB2box_time, iter_cnt = IRB(
                b,
                score_map,
                geo_map,
                score_map_thresh,
                show_log=True,
                merge_iou=nms_thres)  #,iter_max=10,iter_stop_iou=0.9)
        else:
            current_box = b
            IRB2box_time = 0
            iter_cnt = 0
        point_boxes.append(current_box)
        IRB2box_times += IRB2box_time
        iter_cnts += iter_cnt
    point_boxes = np.array(point_boxes)
    if point_boxes.shape[0] != 0:
        point_boxes = point_boxes[point_boxes[:, 8] > box_thresh]
        point_boxes = point_boxes[py_cpu_nms(point_boxes.astype('float32'),
                                             nms_thres)]
    else:
        point_boxes = None
    return point_boxes, timer, IRB2box_times, iter_cnts