Ejemplo n.º 1
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)

    #将OPENCV图像转换为PIL图像,
    pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    #求图片清晰度
    imageVar = cv2.Laplacian(img, cv2.CV_64F).var()
    if imageVar <= 5000:
        pil_img = ImageEnhance.Sharpness(pil_img).enhance(3.0)
    #将PIL图像转换为opencv图像
    img = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)

    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Ejemplo n.º 2
0
def ctpn(sess, net, image_name, save_path1, save_path2):
    timer = Timer()
    timer.tic()

    #读取图片
    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    #灰度化处理
    #img2 = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    #img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2RGB)
    #     base_name = im_name.split('\\')[-1]
    #     cv2.imwrite(os.path.join("data/results2", base_name), img2)

    scores, boxes = test_ctpn(sess, net, img)

    #后处理过程,detect包含过滤和合并
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes2(img, boxes, image_name, save_path2, scale)
    draw_boxes(img, boxes, image_name, save_path1, scale)

    #后处理过程,detect2只过滤小文本框
    #     textdetector = TextDetector()
    #     boxes = textdetector.detect2(boxes, scores[:, np.newaxis], img.shape[:2])
    #     draw_boxes3(img, boxes,image_name, scale)

    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Ejemplo n.º 3
0
    def run_image(self, img_arr):
        img = np.array(img_arr)
        img, scale = self.resize_im(img,
                                    scale=TextLineCfg.SCALE,
                                    max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = self.sess.run(
            [self.output_cls_prob, self.output_box_pred],
            feed_dict={self.input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        result = self.draw_boxes(img, boxes, scale)
        return result
def img_read(im_name):
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print(('Demo for {:s}'.format(im_name)))
    img = open_cv.imread(im_name)
    if img is None:
        print('Img not exist')
        return

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)
    # print('img_read', blobs)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    text_detector = TextDetector()
    boxes = text_detector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, im_name, boxes, scale)
def ctpn_area(sess,
              net,
              image_name,
              dst,
              draw_img=False,
              show_area=False,
              area_min=-0.1,
              area_max=1.1):
    #timer = Timer()
    #timer.tic()

    img = cv2.imread(image_name)
    if img is None:
        return 0.0
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    ret = compute_area(img,
                       image_name,
                       boxes,
                       scale,
                       dst,
                       draw_img=draw_img,
                       show_area=show_area,
                       area_min=area_min,
                       area_max=area_max)
    #timer.toc()
    #print(('Detection took {:.3f}s for '
    #       '{:d} object proposals').format(timer.total_time, boxes.shape[0]))

    return ret
def ctpn(input_path_img, output_path_label, output_path_img, img_section):
    print(('CTPN for {:s}'.format(input_path_img)))
    img = cv2.imread(input_path_img)
    img = img[:img_section[0], :img_section[1]]
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, boxes, scale, output_path_label, output_path_img)
    print('*** OCR Complete ***')
Ejemplo n.º 7
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    height, width = img.shape[:2]
    img = img[int(2 * height / 3.0):height, :]
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)
    # for box in boxes:
    #     color = (0, 255, 0)
    #     cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[1])), color, 2)
    #     cv2.line(img, (int(box[0]), int(box[1])), (int(box[0]), int(box[3])), color, 2)
    #     cv2.line(img, (int(box[2]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
    #     cv2.line(img, (int(box[0]), int(box[3])), (int(box[2]), int(box[3])), color, 2)
    # base_name = image_name.split('/')[-1]
    # cv2.imwrite("data/results/test_"+base_name, img)
    # draw_boxes(img, image_name, boxes, scale)
    # print(boxes)
    # assert 0
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def ctpn(sess, net, image_name):
    global true_text, true_non_text, false_text, false_non_text
    base_name = image_name.split('/')[-1]
    label_name = image_name.split('/')[-2]
    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    print(len(boxes))
    with open('boxes.txt', 'w') as f:
        f.write(str(len(boxes)))
    if len(boxes) > 0:
        if (label_name == 'non_text'):
            false_non_text += 1
        else:
            true_text += 1
            cv2.imwrite(os.path.join('data/results/text', base_name), img)
    else:
        if (label_name == 'text'):
            false_text += 1
        else:
            true_non_text += 1
            cv2.imwrite(os.path.join('data/results/non_text', base_name), img)
Ejemplo n.º 9
0
def test(im_name, sess, output_cls_prob, output_box_pred, input_img,
         keras_model):

    # im_name='test.jpg'###测试图片名字

    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print(('Demo for {:s}'.format(im_name)))
    img = cv2.imread(im_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    predition_result = draw_boxes(img, im_name, boxes, scale, keras_model)
    return predition_result
Ejemplo n.º 10
0
def test_net(sess, net, imdb, weights_filename):
    timer = Timer()
    timer.tic()
    np.random.seed(cfg.RNG_SEED)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    # all_boxes = []
    all_boxes = [[[] for _ in range(imdb.num_classes)]
                 for _ in range(num_images)]
    print(all_boxes)
    for i in range(num_images):
        print('***********', imdb.image_path_at(i))
        img = cv2.imread(imdb.image_path_at(i))
        img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
        scores, boxes = test_ctpn(sess, net, img)
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
        print(('Detection took {:.3f}s for '
               '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
        boxes = check_unreasonable_box(boxes, scale)
        all_boxes[i][1] += boxes
    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    imdb.evaluate_detections(all_boxes, output_dir)
    timer.toc()
Ejemplo n.º 11
0
Archivo: demo.py Proyecto: Skii3/temp
def ctpn(sess, net, image_name, boxlabel):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    img = draw_boxes(img, image_name, boxes, scale, None)
    boxlabel2 = np.transpose(
        np.array([
            boxlabel[:, 0], boxlabel[:, 1], boxlabel[:, 2], boxlabel[:, 1],
            boxlabel[:, 0], boxlabel[:, 3], boxlabel[:, 2], boxlabel[:, 3],
            np.ones(len(boxlabel))
        ]))
    draw_boxes(img, image_name, boxlabel2, 1, (0, 0, 0))
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
    boxes = boxes / scale
    return boxes
Ejemplo n.º 12
0
def text_detection(img):
    # im_name = "test_images/0044000030667_1.jpg"

    # print(('Demo for {:s}'.format(im_name)))
    # img = cv2.imread(im_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = obj.get_text_classification(
        blobs
    )  # sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    # draw_boxes(img, im_name, boxes, scale)
    return return_blobs_tuple(boxes, scale)
    def main2(self, image_array, im_name):
        # for im_name in im_names:
        for i in range(1):
            print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ I"m here')
            # print(('Demo for {:s}'.format(im_name)))
            # img = cv2.imread(im_name)
            img = image_array
            img, scale = self.resize_im(img,
                                        scale=TextLineCfg.SCALE,
                                        max_scale=TextLineCfg.MAX_SCALE)
            blobs, im_scales = _get_blobs(img, None)
            if cfg.TEST.HAS_RPN:
                im_blob = blobs['data']
                blobs['im_info'] = np.array(
                    [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                    dtype=np.float32)
            cls_prob, box_pred = self.sess.run(
                [self.output_cls_prob, self.output_box_pred],
                feed_dict={self.input_img: blobs['data']})
            rois, _ = proposal_layer(cls_prob,
                                     box_pred,
                                     blobs['im_info'],
                                     'TEST',
                                     anchor_scales=cfg.ANCHOR_SCALES)

            scores = rois[:, 0]
            boxes = rois[:, 1:5] / im_scales[0]
            textdetector = TextDetector()
            boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                        img.shape[:2])
            self.draw_boxes(img, im_name, boxes, scale)
Ejemplo n.º 14
0
def ctpn(sess, net, image_path):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_path)
    img_name = image_path.split('/')[-1]
    # 将图像进行resize并返回其缩放大小
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    # 送入网络得到1000个得分,1000个bbox
    cls, scores, boxes = test_ctpn(sess, net, img)

    print('cls, scores, boxes', cls.shape, scores.shape, boxes.shape)

    # img_re = img
    # for i in range(np.shape(boxes)[0]):
    #     if cls[i] == 1:
    #         color = (255, 0, 0)
    #     else:
    #         color = (0, 255, 0)
    #     cv2.rectangle(img_re, (boxes[i][0],boxes[i][1]),(boxes[i][2],boxes[i][3]),color,1)
    # cv2.imwrite(os.path.join('./data/proposal_res', img_name), img_re)

    handwritten_filter = np.where(cls == 1)[0]
    handwritten_scores = scores[handwritten_filter]
    handwritten_boxes = boxes[handwritten_filter, :]

    print_filter = np.where(cls == 2)[0]
    print_scores = scores[print_filter]
    print_boxes = boxes[print_filter, :]

    handwritten_detector = TextDetector()
    handwritten_detector = TextDetector()

    print('print_filter', np.array(print_filter).shape)
    print('handwritten_boxes, handwritten_scores', handwritten_boxes.shape,
          handwritten_scores[:, np.newaxis].shape)

    filted_handwritten_boxes = handwritten_detector.detect(
        handwritten_boxes, handwritten_scores[:, np.newaxis], img.shape[:2])
    filted_print_boxes = handwritten_detector.detect(
        print_boxes, print_scores[:, np.newaxis], img.shape[:2])

    # boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, filted_handwritten_boxes, (255, 0, 0))
    draw_boxes(img, filted_print_boxes, (0, 255, 0))

    img = cv2.resize(img,
                     None,
                     None,
                     fx=1.0 / scale,
                     fy=1.0 / scale,
                     interpolation=cv2.INTER_LINEAR)
    cv2.imwrite(os.path.join("data/results", img_name), img)

    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Ejemplo n.º 15
0
def detect_text_ctpn(image_bytes: bytes,
                     sess: tf.Session) -> (np.ndarray, np.ndarray, float):
    """
    Given an image and an active tensorflow session loaded with config/model,
    run the model to identify regions of interest (i.e. regions that are
    likely to contain text).

    :param image_bytes: image to detect text/perform ocr on
    :param sess: active tensorflow session with graph and config loaded
    """

    # TODO: Read more about this section, up to TextDetector()
    # Retrieve tensors from graph
    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')

    # Process image
    img_array = np.frombuffer(image_bytes, np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_ANYCOLOR)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    img = rotate(img, skew_angle(image=img))

    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                  feed_dict={input_img: blobs['data']})

    rois, _ = proposal_layer(cls_prob,
                             box_pred,
                             blobs['im_info'],
                             'TEST',
                             anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]

    # apply nms and retain only high scoring boxes/proposals
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])

    # crop regions of interest indicated by boxes
    cropped_images = draw_boxes(img, boxes)

    # for each region of interest, perform ocr
    mystrings = []
    for cropped in cropped_images:
        mystrings.append(ocr(cropped))

    # return collection of text extracted from the image
    return mystrings
Ejemplo n.º 16
0
def ctpn_batch(imglist):
    cfg_from_file('./ctpn/text.yml')

    # init session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('./ctpn/data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')
    stroutput = []
    imgoutput = []
    for i in range(len(imglist)):
        img = imglist[i]
        #name = imgnames[i]
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)

        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        strlist, img = process_boxes(img, boxes, scale)
        stroutput.append(strlist)
        imgoutput.append(img)
        # cv2.imshow("detection", img)
        # while (1):
        #     if cv2.waitKey(1) & 0xFF == ord('q'):
        #         break
        # cv2.destroyWindow("detection")
        # print(str(len(strlist)) + "个框")
        # print(strlist)
    return stroutput, imgoutput
Ejemplo n.º 17
0
 def load(self):
     logging.info('Creating networks and loading parameters')
     cfg_from_file(os.path.join(os.path.dirname(__file__),'text.yml'))
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.gpu_fraction)
     config = tf.ConfigProto(allow_soft_placement=True,gpu_options=gpu_options)
     self.session = tf.Session(config=config)
     self.net = get_network("VGGnet_test")
     self.textdetector = TextDetector()
     saver = tf.train.Saver()
     ckpt = tf.train.get_checkpoint_state(self.model_path)
     saver.restore(self.session, ckpt.model_checkpoint_path)
Ejemplo n.º 18
0
def ctpn(img):

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)

    scores, boxes = test_ctpn(sess, net, img)
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])

    return scores, boxes, img, scale
    def ctpn(self, image_name):
        img = cv2.imread(image_name)
        img, scale = self.resize_im(img, scale=600, max_scale=1000)  # 参考ctpn论文
        scores, boxes = test_ctpn(self.sess, self.net, img)
        # ctpn识别实例
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        min_y_sort_list, base_name = self.get_coordinates(
            img, image_name, boxes, scale)

        return min_y_sort_list, base_name
Ejemplo n.º 20
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()
    img = cv2.imread(image_name)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Ejemplo n.º 21
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
Ejemplo n.º 22
0
def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
Ejemplo n.º 23
0
def ctpn(sess, net, image_name, model):
    img = cv2.imread(image_name)

    #r = image_to_binary(img)
    #noise = np.ones(img.shape[:2],dtype="uint8") * 125
    #img = cv2.merge((r+noise, r, noise))
    
    img, scale = resize_im(img, scale=600, max_scale=1000) # 参考ctpn论文
    print('ctpn', img.shape)
    scores, boxes = test_ctpn(sess, net, img)
    # ctpn识别实例
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    get_coordinates(img, image_name, boxes, scale, model)
Ejemplo n.º 24
0
 def predict(self, image_name):
     img = cv2.imread(image_name)
     img, scale = self.resize_im(img,
                                 scale=TextLineCfg.SCALE,
                                 max_scale=TextLineCfg.MAX_SCALE)
     scores, boxes = test_ctpn(self.sess, self.net, img)
     # print('scores', scores)
     # mask = scores > 0.9
     # boxes = boxes[mask]
     # print('length of boxes', len(boxes))
     textdetector = TextDetector()
     boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                 img.shape[:2])
     return img, boxes, scale
Ejemplo n.º 25
0
def ctpn(sess, net, img):
    timer = Timer()
    timer.tic()
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    sort_index = np.argsort(boxes[:, -1])[::-1]
    boxes = boxes[sort_index]
    im, bboxes = draw_boxes(img, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
    return im, bboxes
Ejemplo n.º 26
0
def ocr():
    # get data
    jsonData = request.get_json()
    ori_file = jsonData['path']

    # init session
    cfg_from_file('ctpn/text.yml')
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name(
        'rpn_bbox_pred/Reshape_1:0')

    im_names = glob.glob(os.path.join(ori_file))
    for im_name in im_names:
        img = cv2.imread(im_name)
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred],
                                      feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob,
                                 box_pred,
                                 blobs['im_info'],
                                 'TEST',
                                 anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        im_dict = draw_boxes(img, im_name, boxes, scale)

    return Response(json.dumps(im_dict), mimetype='application/json')
def ctpn(cv_image):
    os.chdir(CTPN_DIR)
    with ctpn_sess.as_default():
        img = cv_image
        img, scale = resize_im(img,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        scores, boxes = test_ctpn(ctpn_sess, ctpn_net, img)

        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        boxes[:, 0:8] /= scale

    os.chdir(ROOT_DIR)
    return boxes
Ejemplo n.º 28
0
def ctpn(sess, net, frame, draw):
    # timer = Timer()
    # timer.tic()

    img, scale = resize_im(
        frame, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    buf = img.copy()
    crop = crop_image(buf, boxes, scale)

    # timer.toc()
    if draw is 1:
        draw_boxes(img, boxes, scale)
    return crop
def detection(input_image):
    

#if __name__ == '__main__':

    '''
    if os.path.exists("data/results/"):
        shutil.rmtree("data/results/")
    os.makedirs("data/results/")

    cfg_from_file('ctpn/text.yml')
    '''
    # init session
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')

    #im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
    #           glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))

    #input_image = "data/demo/medication-pills-package-3D-model_0.jpg"
    img = cv2.imread(input_image)
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)

    scores = rois[:, 0]
    boxes = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, input_image, boxes, scale)
def ctpn(img):
    timer = Timer()
    timer.tic()

    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)
    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    timer.toc()
    #print("\n----------------------------------------------")
    #print(('Detection took {:.3f}s for '
    #      '{:d} object proposals').format(timer.total_time, boxes.shape[0]))

    return scores, boxes, img, scale, timer.total_time, boxes.shape[0]
def decode_ctpn_output(ctpn_output, im_scales, bbox_scale, img_resized_shape):
    rois = ctpn_output[0]

    scores = rois[:, 0]
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        # print(im_scales[0])
        boxes = rois[:, 1:5] / im_scales[0]

    textdetector = TextDetector()
    # 得到是resize图像后的bbox
    text_proposals, scores, resized_boxes = textdetector.detect(
        boxes, scores[:, np.newaxis], img_resized_shape[:2])
    # 原图像的绝对bbox位置
    original_bbox, scores = resize_bbox(resized_boxes, bbox_scale)
    bbox_connector = BboxConnector(original_bbox)
    res_bbox = bbox_connector.start()
    return res_bbox
Ejemplo n.º 32
0
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')
    sess.run(tf.global_variables_initializer())

    input_img = sess.graph.get_tensor_by_name('Placeholder:0')
    output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
    output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')

    im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
               glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))

    for im_name in im_names:
        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        print(('Demo for {:s}'.format(im_name)))
        img = cv2.imread(im_name)
        img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
        blobs, im_scales = _get_blobs(img, None)
        if cfg.TEST.HAS_RPN:
            im_blob = blobs['data']
            blobs['im_info'] = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
        cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
        rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)

        scores = rois[:, 0]
        boxes = rois[:, 1:5] / im_scales[0]
        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
        draw_boxes(img, im_name, boxes, scale)