Ejemplo n.º 1
0
def textline_extract(image, prediction, threshold=0.3):
    h, w, _ = image.shape
    cls = np.array(prediction[0])
    regr = np.array(prediction[1])
    cls_prod = np.array(prediction[2])
    anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16)
    bbox = utils.bbox_transfor_inv(anchor, regr)
    bbox = utils.clip_box(bbox, [h, w])
    #score > 0.7

    fg = np.where(cls_prod[0, :, 1] > threshold)[0]
    select_anchor = bbox[fg, :]
    select_score = cls_prod[0, fg, 1]
    select_anchor = select_anchor.astype('int32')
    #filter size
    keep_index = utils.filter_bbox(select_anchor, 16)
    #nsm
    select_anchor = select_anchor[keep_index]
    select_score = select_score[keep_index]
    select_score = np.reshape(select_score, (select_score.shape[0], 1))
    nmsbox = np.hstack((select_anchor, select_score))
    keep = utils.nms(nmsbox, 0.3)
    select_anchor = select_anchor[keep]
    select_score = select_score[keep]
    #text line
    textConn = text_connect.TextProposalConnector()
    text = textConn.get_text_lines(select_anchor, select_score, [h, w])
    text = list(text.astype('int32'))
    return text
    def __call__(self, img, bboxes):        
        #Chose a random digit to scale by 
        img_shape = img.shape

        #translate the image

        #percentage of the dimension of the image to translate
        translate_factor_x = random.uniform(*self.translate)
        translate_factor_y = random.uniform(*self.translate)

        if not self.diff:
            translate_factor_y = translate_factor_x

        canvas = np.zeros(img_shape).astype(np.uint8)


        corner_x = int(translate_factor_x*img.shape[1])
        corner_y = int(translate_factor_y*img.shape[0])



        #change the origin to the top-left corner of the translated box
        orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]

        mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
        canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
        img = canvas

        #bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
        bboxes = bboxes.astype(np.float64)
        bboxes += np.array([corner_x, corner_y, corner_x, corner_y])

        bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)

        return img, bboxes
Ejemplo n.º 3
0
    def __call__(self, img, bboxes):

        angle = random.uniform(*self.angle)

        w, h = img.shape[1], img.shape[0]
        cx, cy = w // 2, h // 2

        img = rotate_im(img, angle)
        corners = get_corners(bboxes)
        corners = np.hstack((corners, bboxes[:, 4:]))
        corners[:, :8] = rotate_box(corners[:, :8], angle, cx, cy, h, w)
        new_bbox = get_enclosing_box(corners).astype(np.float64)

        scale_factor_x = img.shape[1] / w

        scale_factor_y = img.shape[0] / h

        img = cv2.resize(img, (w, h))
        new_bbox[:, :4] /= np.array(
            [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y])

        bboxes = new_bbox

        bboxes = clip_box(bboxes, [0, 0, w, h], 0.25)

        return img, bboxes
Ejemplo n.º 4
0
def proposal_graph(proposal_count, nms_threshold, config, batch_size, cls_prob, deltas, anchors):
    # (num_batch, N, 4)
    deltas = deltas * tf.reshape(tf.constant(config.RPN_BBOX_STD_DEV, tf.float32), (1, 1, 4))
    scores = cls_prob[:, :, 1]
    indices = tf.nn.top_k(scores, k=tf.minimum(config.PRE_NMS_PROPOSALS_INFERENCE, tf.shape(anchors)[0]), sorted=True).indices
    # 以下indicesのものだけ.
    scores = utils.batch_slice((scores, indices), batch_size, tf.gather)
    deltas = utils.batch_slice((deltas, indices), batch_size, tf.gather)
    anchors = utils.batch_slice((indices,), batch_size, lambda x:tf.gather(anchors, x))
    #pre_nms_box = utils.apply_deltas(anchors, deltas)
    pre_nms_boxes = utils.batch_slice((anchors, deltas), batch_size, utils.apply_deltas)
    windows = np.array([0, 0, 1, 1], dtype=np.float32)
    pre_nms_boxes = utils.batch_slice((pre_nms_boxes,), batch_size, lambda x: utils.clip_box(x, windows))
    tf.function()
    #nms_indices = tf.image.non_max_suppression(pre_nms_box, scores, max_k, iou_threshold=0.5)
    def nms(pre_nms_box, scores):
        #indices = utils.non_maximum_suppression(pre_nms_box, scores, proposal_count, iou_min=0.5, sorted=True)
        #indices = tf.image.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression")
        indices = utils.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression")
        proposals = tf.gather(pre_nms_box, indices)
        num_pad = tf.maximum(proposal_count - tf.shape(proposals)[0], 0)
        proposals = tf.pad(proposals, [(0, num_pad), (0, 0)])
        proposals = tf.gather(proposals, tf.range(proposal_count))
        return proposals
    proposals = utils.batch_slice((pre_nms_boxes, scores), batch_size, nms)
    return proposals, pre_nms_boxes
Ejemplo n.º 5
0
    def __call__(self, img, bboxes):
        #Chose a random digit to scale by 
        img_shape = img.shape
        if self.diff:
            scale_x = random.uniform(*self.scale)
            scale_y = random.uniform(*self.scale)
        else:
            scale_x = random.uniform(*self.scale)
            scale_y = scale_x
    
        resize_scale_x = 1 + scale_x
        resize_scale_y = 1 + scale_y
    
        img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
    
        #bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
        bboxes = bboxes.astype(np.float64)
        bboxes *= np.array([resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y])

        canvas = np.zeros(img_shape, dtype = np.uint8)
    
        y_lim = int(min(resize_scale_y,1)*img_shape[0])
        x_lim = int(min(resize_scale_x,1)*img_shape[1])
    
        #print(y_lim, x_lim)
    
        canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:]
    
        img = canvas
        bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
  
        return img, bboxes
Ejemplo n.º 6
0
def refine_detection_graph(rois, cls_prob, deltas, window, config):
    # cls_probが大きいもの(deltaごとに1つだけ)
    cls_ids = tf.cast(tf.argmax(cls_prob, axis=1), tf.int32)
    # ((0, i0), (1, i1), (2, i2), ...)
    indices = tf.stack([tf.range(tf.shape(cls_ids)[0]), cls_ids], axis=1)
    cls_scores = tf.gather_nd(cls_prob, indices)
    # 最もcls_probの大きいclsのdeltasを用いる.
    cls_deltas = tf.gather_nd(deltas, indices)
    # roiは提案されたもの. boxはdeltaを適用したもの.
    boxes = utils.apply_deltas(rois, cls_deltas*config.BBOX_STD_DEV)
    boxes = utils.clip_box(boxes, window)
    # back_groundではないもの
    keep = tf.where(cls_ids > 0)[:, 0]
    if config.MIN_SCORE:
        keep_2 = tf.where(cls_scores > config.MIN_SCORE)[:, 0]
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(keep_2, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]
    pre_nms_cls_ids = tf.gather(cls_ids, keep)
    pre_nms_scores = tf.gather(cls_scores, keep)
    pre_nms_boxes = tf.gather(boxes, keep)
    unique_class_ids = tf.unique(pre_nms_cls_ids)[0]
    
    def nms_cls(cls_id):
        #ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))
        # pre_nmsのなかで何番目か
        ixs = tf.where(tf.equal(pre_nms_cls_ids, cls_id))[:, 0]
        # ixsの中で何番目か
        cls_keep = tf.image.non_max_suppression(
            tf.gather(pre_nms_boxes, ixs),
            tf.gather(pre_nms_scores, ixs),
            max_output_size=config.MAX_NUM_ROIS,
            iou_threshold=config.DETECTION_NMS_THRESHOLD)
        cls_keep = tf.gather(ixs, cls_keep)
        cls_keep = tf.gather(keep, cls_keep)
        gap = config.MAX_NUM_ROIS - tf.shape(cls_keep)[0]
        cls_keep = tf.pad(cls_keep, [(0, gap)], mode="CONSTANT", constant_values=-1)
        cls_keep.set_shape([config.MAX_NUM_ROIS])
        return cls_keep
    nms_keep = tf.map_fn(nms_cls, unique_class_ids, dtype=tf.int64)
    nms_keep = tf.reshape(nms_keep, (-1,))
    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
    keep = tf.sets.set_intersection(tf.expand_dims(nms_keep, 0), tf.expand_dims(keep, 0))
    keep = tf.sparse_tensor_to_dense(keep)[0]
    roi_cound = config.MAX_NUM_ROIS
    class_score_keep = tf.gather(cls_scores, keep)
    num_keep = tf.minimum(roi_cound, tf.shape(class_score_keep)[0])
    top_ids = tf.nn.top_k(class_score_keep, num_keep, sorted=True)[1]
    keep = tf.gather(keep, top_ids)
    detected_boxes = tf.gather(boxes, keep)
    detected_cls_ids = tf.gather(cls_ids, keep)
    detected_cls_scores = tf.gather(cls_scores, keep)
    detections = tf.concat([detected_boxes,
        tf.to_float(detected_cls_ids)[..., tf.newaxis],
        detected_cls_scores[..., tf.newaxis]], axis=1)

    gap = roi_cound - tf.shape(detections)[0]
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections
    def textline_extract(self,image):
        assert len(image.shape) == 3
        h,w,c= image.shape
        #zero-center by mean pixel
        m_img = image - utils.IMAGE_MEAN

#         m_img = np.expand_dims(m_img,axis=0)
        p_data0 = {'inputs_1':m_img}
        param = {"instances":[p_data0]}
        predict_request = json.dumps(param,cls=NumpyEncoder)
        start = time.time()

        response = requests.post(self.server_url, data=predict_request)
        response.raise_for_status()
        prediction = response.json()['predictions'][0]
#         result = self.basemodel.predict(m_img)
        cls  = np.array(prediction['output0'])
        regr = np.array(prediction['output1'])
        cls_prod = np.array(prediction['output2'])
        cls = np.expand_dims(cls,axis=0)
        regr = np.expand_dims(regr,axis=0)
        cls_prod = np.expand_dims(cls_prod,axis=0)
        anchor = utils.gen_anchor((int(h/16),int(w/16)),16)

        bbox = utils.bbox_transfor_inv(anchor,regr)
        bbox = utils.clip_box(bbox,[h,w])

        #score > 0.7
        fg = np.where(cls_prod[0,:,1]>self.threshold)[0]
        select_anchor = bbox[fg,:]
        select_score = cls_prod[0,fg,1]
        select_anchor = select_anchor.astype('int32')

        #filter size
        keep_index = utils.filter_bbox(select_anchor,16)

        #nsm
        select_anchor = select_anchor[keep_index]
        select_score = select_score[keep_index]
        select_score = np.reshape(select_score,(select_score.shape[0],1))
        nmsbox = np.hstack((select_anchor,select_score))
        keep = utils.nms(nmsbox,0.3)
        select_anchor = select_anchor[keep]
        select_score = select_score[keep]

        #text line
        textConn = text_connect.TextProposalConnector()
        text = textConn.get_text_lines(select_anchor,select_score,[h,w])
        end = time.time()

        text = list(text.astype('int32'))

        return text
    def textline_extract(self,image):
        assert len(image.shape) == 3
        h,w,c= image.shape
        #zero-center by mean pixel
        m_img = image - utils.IMAGE_MEAN

        m_img = np.expand_dims(m_img,axis=0)

        start = time.time()
        result = self.basemodel.predict(m_img)
        cls,regr,cls_prod  = result

        anchor = utils.gen_anchor((int(h/16),int(w/16)),16)

        bbox = utils.bbox_transfor_inv(anchor,regr)
        bbox = utils.clip_box(bbox,[h,w])

        #score > 0.7
        fg = np.where(cls_prod[0,:,1]>self.threshold)[0]
        select_anchor = bbox[fg,:]
        select_score = cls_prod[0,fg,1]
        select_anchor = select_anchor.astype('int32')

        #filter size
        keep_index = utils.filter_bbox(select_anchor,16)

        #nsm
        select_anchor = select_anchor[keep_index]
        select_score = select_score[keep_index]
        select_score = np.reshape(select_score,(select_score.shape[0],1))
        nmsbox = np.hstack((select_anchor,select_score))
        keep = utils.nms(nmsbox,0.3)
        select_anchor = select_anchor[keep]
        select_score = select_score[keep]

        #text line
        textConn = text_connect.TextProposalConnector()
        text = textConn.get_text_lines(select_anchor,select_score,[h,w])
        end = time.time()

        text = list(text.astype('int32'))

        return text
Ejemplo n.º 9
0
    def _detect(image):
        '''

        :param image: numpy array image, h,w,c
        :return: text location,a list which contains a list of x, y coors, prob. with shape (#lines, 9)
        '''
        h, w, c = image.shape
        image = image - config.IMAGE_MEAN
        image = np.expand_dims(image, axis=0)  # batch_sz, h, w, c

        _, regr, cls_prob = infer_model.predict(image)

        anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16)
        bbox = utils.bbox_transfor_inv(anchor, regr)
        bbox = utils.clip_box(bbox, [h, w])

        fg = np.where(cls_prob[0, :, 1] > 0.7)[0]
        select_anchor = bbox[fg, :]
        select_score = cls_prob[0, fg, 1]
        select_anchor = select_anchor.astype(np.int32)

        keep_index = utils.filter_bbox(select_anchor, 16)

        # nsm
        select_anchor = select_anchor[keep_index]
        select_score = select_score[keep_index]
        select_score = np.reshape(select_score, (select_score.shape[0], 1))
        nmsbox = np.hstack((select_anchor, select_score))
        keep = utils.nms(nmsbox, 0.3)
        select_anchor = select_anchor[keep]
        select_score = select_score[keep]

        # text line
        textConn = TextProposalConnectorOriented()
        text = textConn.get_text_lines(select_anchor, select_score, [h, w])
        return text
Ejemplo n.º 10
0
#####################  detection ######################
image = cv2.imread(img_path)
image = utils.resize(image, width=width)
image_ori = image.copy()
h, w, c = image.shape
image = image - config.IMAGE_MEAN
image = np.expand_dims(image, axis=0)

infer_model = ctpn_model.create_ctpn_model()
infer_model.load_weights(ctpn_weight_path)

cls, regr, cls_prob = infer_model.predict(image)

anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16)
bbox = utils.bbox_transfor_inv(anchor, regr)
bbox = utils.clip_box(bbox, [h, w])

fg = np.where(cls_prob[0, :, 1] > 0.7)[0]
select_anchor = bbox[fg, :]
select_score = cls_prob[0, fg, 1]
select_anchor = select_anchor.astype(np.int32)

keep_index = utils.filter_bbox(select_anchor, 16)

# nsm
select_anchor = select_anchor[keep_index]
select_score = select_score[keep_index]
select_score = np.reshape(select_score, (select_score.shape[0], 1))
nmsbox = np.hstack((select_anchor, select_score))
keep = utils.nms(nmsbox, 0.3)
select_anchor = select_anchor[keep]