def get_coords(image_name):

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    # textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    all_coords = draw_boxes(img, image_name, boxes, scale)
    return all_coords
    def run_image(self, img_arr):
        img = np.array(img_arr)
        img, scale = self.resize_im(img,
                                    scale=TextLineCfg.SCALE,
                                    max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = self.sess.run(
            [self.output_cls_prob, self.output_box_pred],
            feed_dict={self.input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        result = self.draw_boxes(img, boxes, scale)
        return result
    def main2(self, image_array, im_name):
        # for im_name in im_names:
        for i in range(1):
            print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ I"m here')
            # print(('Demo for {:s}'.format(im_name)))
            # img = cv2.imread(im_name)
            img = image_array
            img, scale = self.resize_im(img,
                                        scale=TextLineCfg.SCALE,
                                        max_scale=TextLineCfg.MAX_SCALE)
            blobs, im_scales = _get_blobs(img, None)
            if cfg.TEST.HAS_RPN:
                im_blob = blobs['data']
                blobs['im_info'] = np.array(
                    [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                    dtype=np.float32)
            cls_prob, box_pred = self.sess.run(
                [self.output_cls_prob, self.output_box_pred],
                feed_dict={self.input_img: blobs['data']})
            rois, _ = proposal_layer(cls_prob,
                                     box_pred,
                                     blobs['im_info'],
                                     'TEST',
                                     anchor_scales=cfg.ANCHOR_SCALES)

            scores = rois[:, 0]
            boxes = rois[:, 1:5] / im_scales[0]
            textdetector = TextDetector()
            boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                        img.shape[:2])
            self.draw_boxes(img, im_name, boxes, scale)
Exemple #4
0
def text_detection(img):
    # im_name = "test_images/0044000030667_1.jpg"

    # print(('Demo for {:s}'.format(im_name)))
    # img = cv2.imread(im_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = obj.get_text_classification(
        blobs
    )  # sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    # draw_boxes(img, im_name, boxes, scale)
    return return_blobs_tuple(boxes, scale)
def ctpn(input_path_img, output_path_label, output_path_img, img_section):
    print(('CTPN for {:s}'.format(input_path_img)))
    img = cv2.imread(input_path_img)
    img = img[:img_section[0], :img_section[1]]
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, boxes, scale, output_path_label, output_path_img)
    print('*** OCR Complete ***')
def img_read(im_name):
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print(('Demo for {:s}'.format(im_name)))
    img = open_cv.imread(im_name)
    if img is None:
        print('Img not exist')
        return

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)
    # print('img_read', blobs)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    text_detector = TextDetector()
    boxes = text_detector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, im_name, boxes, scale)
Exemple #7
0
def test(im_name, sess, output_cls_prob, output_box_pred, input_img,
         keras_model):

    # im_name='test.jpg'###测试图片名字

    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print(('Demo for {:s}'.format(im_name)))
    img = cv2.imread(im_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    predition_result = draw_boxes(img, im_name, boxes, scale, keras_model)
    return predition_result
Exemple #8
0
def detect_text_ctpn(image_bytes: bytes,
                     sess: tf.Session) -> (np.ndarray, np.ndarray, float):
    """
    Given an image and an active tensorflow session loaded with config/model,
    run the model to identify regions of interest (i.e. regions that are
    likely to contain text).

    :param image_bytes: image to detect text/perform ocr on
    :param sess: active tensorflow session with graph and config loaded
    """

    # TODO: Read more about this section, up to TextDetector()
    # Retrieve tensors from graph
    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')

    # Process image
    img_array = np.frombuffer(image_bytes, np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_ANYCOLOR)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    img = rotate(img, skew_angle(image=img))

    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})

    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]

    # apply nms and retain only high scoring boxes/proposals
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])

    # crop regions of interest indicated by boxes
    cropped_images = draw_boxes(img, boxes)

    # for each region of interest, perform ocr
    mystrings = []
    for cropped in cropped_images:
        mystrings.append(ocr(cropped))

    # return collection of text extracted from the image
    return mystrings
Exemple #9
0
def ctpn_batch(imglist):
    cfg_from_file('./ctpn/text.yml')

    # init session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('./ctpn/data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')
    stroutput = []
    imgoutput = []
    for i in range(len(imglist)):
        img = imglist[i]
        #name = imgnames[i]
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)

        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        strlist, img = process_boxes(img, boxes, scale)
        stroutput.append(strlist)
        imgoutput.append(img)
        # cv2.imshow("detection", img)
        # while (1):
        #     if cv2.waitKey(1) & 0xFF == ord('q'):
        #         break
        # cv2.destroyWindow("detection")
        # print(str(len(strlist)) + "个框")
        # print(strlist)
    return stroutput, imgoutput
Exemple #10
0
def ocr():
    # get data
    jsonData = request.get_json()
    ori_file = jsonData['path']

    # init session
    cfg_from_file('ctpn/text.yml')
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')

    im_names = glob.glob(os.path.join(ori_file))
    for im_name in im_names:
        img = cv2.imread(im_name)
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        im_dict = draw_boxes(img, im_name, boxes, scale)

    return Response(json.dumps(im_dict), mimetype='application/json')
Exemple #11
0
def detect(im_name):
    print('detect for {:s}'.format(im_name))
    img = cv2.imread(im_name)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)
    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    boxes = text_detector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    return boxes, scale, img
def detection(input_image):
    

#if __name__ == '__main__':

    '''
    if os.path.exists("data/results/"):
        shutil.rmtree("data/results/")
    os.makedirs("data/results/")

    cfg_from_file('ctpn/text.yml')
    '''
    # init session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')

    #im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
    #           glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))

    #input_image = "data/demo/medication-pills-package-3D-model_0.jpg"
    img = cv2.imread(input_image)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, input_image, boxes, scale)
Exemple #13
0
def text_detection(img):
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')

    # im_name = "test_images/0044000030667_1.jpg"

    #print(('Demo for {:s}'.format(im_name)))
    # img = cv2.imread(im_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    # draw_boxes(img, im_name, boxes, scale)
    return (return_blobs_tuple(boxes, scale))
def ctpn_boxes(img):
    cfg_from_file('ctpn/text.yml')

    # init session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')

    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)

    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    bboxes = get_boxes(img, boxes, scale)

    bboxes.sort(key=lambda x: x[1]) #Sort as per y_min of every bounding box for processing heightwise
 
    return bboxes
Exemple #15
0
        start = time.time()
        img = cv2.imread(im_name)
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        draw_boxes(img, im_name, boxes, scale)
        # up box (-1,4)
        ret_boxes = produce_normal_boxes(boxes, scale)
        # (-1,4,2) list格式
        print(ret_boxes)
        print(time.time() - start)
def query_ctpn(sess, cv2img):
    """Args:
        sess: tensorflow session
        cfg: CTPN config
        img: numpy array image

   Returns:
       A list of detected bounding boxes,
        each bounding box have followed coordinates: [(xmin, ymin), (xmax, ymax)]
            (xmin, ymin) -------------
                 |                    |
             ---------------- (xmax, ymax)
    """
    # Specify input/output
    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_box = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')
    #print('query_pb : img, ',  img)

    img, scale = resize_im(cv2img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_box, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        #print('cls_prob, ', cls_prob, box_pred )
        print('box_pred, ', box_pred)
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)
        print('rois, ', rois)

        scores = rois[:, 0]
        #print('scores, ', scores )
        boxes = rois[:, 1:5] / im_scales[0]
        #print('boxes=rois, ', boxes )

        textdetector = TextDetector()
        print('textDetector, ', textdetector)
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        print('boxes=textdetector, ', boxes)

        # Convert boxes to bouding rectangles
        rects = []
        for box in boxes:
            min_x = min(int(box[0] / scale), int(box[2] / scale),
                        int(box[4] / scale), int(box[6] / scale))
            min_y = min(int(box[1] / scale), int(box[3] / scale),
                        int(box[5] / scale), int(box[7] / scale))
            max_x = max(int(box[0] / scale), int(box[2] / scale),
                        int(box[4] / scale), int(box[6] / scale))
            max_y = max(int(box[1] / scale), int(box[3] / scale),
                        int(box[5] / scale), int(box[7] / scale))

        rects.append([(min_x, min_y), (max_x, max_y)])
        print('rects.append, ', rects)
        return rects
Exemple #17
0
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')

    im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
               glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))

    for im_name in im_names:
        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        print(('Demo for {:s}'.format(im_name)))
        img = cv2.imread(im_name)
        img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
        draw_boxes(img, im_name, boxes, scale)