Python TextDetector.detectの例、utils.text_connector.detectors.TextDetector.detect Pythonの例

コード例 #1

0

ファイルを表示

ファイル: inference.py プロジェクト: zhengyi144/pytorch.ctpn

    def detect(self,img_file):
        start_time = time.time()
        img = cv2.imread(im_file)
        img_ori,(rh, rw) = resize_image(img)
        h, w, c = img_ori.shape
        im_info = np.array([h, w, c]).reshape([1, 3])
        img = toTensorImage(img_ori)

        pre_score,pre_reg = self.model(img)
        score = pre_score.reshape((pre_score.shape[0], 10, 2, pre_score.shape[2], pre_score.shape[3])).squeeze(0).permute(0,2,3,1).reshape((-1, 2))
        score = F.softmax(score, dim=1)
        score = score.reshape((10, pre_reg.shape[2], -1, 2))

        pre_score =score.permute(1,2,0,3).reshape(pre_reg.shape[2],pre_reg.shape[3],-1).unsqueeze(0).cpu().detach().numpy()
        pre_reg =pre_reg.permute(0,2,3,1).cpu().detach().numpy()

        textsegs, _ = proposal_layer(pre_score, pre_reg, im_info)
        scores = textsegs[:, 0]
        textsegs = textsegs[:, 1:5]

        textdetector = TextDetector(DETECT_MODE='O')
        boxes ,text_proposals= textdetector.detect(textsegs, scores[:, np.newaxis], img_ori.shape[:2])
        boxes = np.array(boxes, dtype=np.int)
        text_proposals = text_proposals.astype(np.int)
        print('cost_time:'+str(time.time()-start_time)+'s')
        return boxes,text_proposals

コード例 #2

0

ファイルを表示

ファイル: inference.py プロジェクト: zgd716/pytorch.ctpn

    def detect(self, img_file):
        img = Image.open(img_file).convert('RGB')
        img = np.array(img)
        img_ori, (rh, rw) = resize_image(img)
        h, w, c = img_ori.shape
        im_info = np.array([h, w, c]).reshape([1, 3])
        img = toTensorImage(img_ori)
        with torch.no_grad():
            pre_score, pre_reg, refine_ment = self.model(img)
        score = pre_score.reshape(
            (pre_score.shape[0], 10, 2, pre_score.shape[2],
             pre_score.shape[3])).squeeze(0).permute(0, 2, 3, 1).reshape(
                 (-1, 2))
        score = F.softmax(score, dim=1)
        score = score.reshape((10, pre_reg.shape[2], -1, 2))

        pre_score = score.permute(
            1, 2, 0, 3).reshape(pre_reg.shape[2], pre_reg.shape[3],
                                -1).unsqueeze(0).cpu().detach().numpy()
        pre_reg = pre_reg.permute(0, 2, 3, 1).cpu().detach().numpy()
        refine_ment = refine_ment.permute(0, 2, 3, 1).cpu().detach().numpy()

        textsegs, _ = proposal_layer(pre_score, pre_reg, refine_ment, im_info)
        scores = textsegs[:, 0]
        textsegs = textsegs[:, 1:5]

        textdetector = TextDetector(DETECT_MODE=self.detect_type)
        boxes, text_proposals = textdetector.detect(textsegs,
                                                    scores[:, np.newaxis],
                                                    img_ori.shape[:2])
        boxes = np.array(boxes, dtype=np.int)
        text_proposals = text_proposals.astype(np.int)
        return boxes, text_proposals, rh, rw

コード例 #3

0

ファイルを表示

ファイル: inference.py プロジェクト: zonghaofan/ctpn_torch

    def detect_single_img(self, img):
        ori_h, ori_w, _ = img.shape
        img_res, (rh, rw) = self.resize_image(img)
        h, w, c = img_res.shape
        # print('===img.shape==:', h, w, c)
        # cv2.imwrite('./img_res.jpg', img_res)
        im_info = np.array([h, w, c]).reshape([1, 3])
        img = self.toTensorImage(img_res)
        #(b, 20, h, w) #(b, 40, h, w)
        pre_score, pre_reg = self.model(img)
        print('==pre_score.shape===', pre_score.shape)
        print('====pre_reg.shape:', pre_reg.shape)
                                    #(b,10,2,h,w)-->(10, h, w,2)-->(10*h*w, 2)
        score = pre_score.reshape((pre_score.shape[0], 10, 2, pre_score.shape[2], pre_score.shape[3])).squeeze(0).permute(0,2,3,1).reshape((-1, 2))
        score = F.softmax(score, dim=1)
        #(10, h, w, 2)
        score = score.reshape((10, pre_reg.shape[2], -1, 2))

        #(h, w, 10, 2)-->(b, h, w, 20)
        pre_score =score.permute(1, 2, 0, 3).reshape(pre_reg.shape[2],pre_reg.shape[3],-1).unsqueeze(0).cpu().detach().numpy()
        #(b, h, w, 40)
        pre_reg =pre_reg.permute(0, 2, 3, 1).cpu().detach().numpy()

        textsegs, _ = proposal_layer(pre_score, pre_reg, im_info)
        scores = textsegs[:, 0]
        textsegs = textsegs[:, 1:5]

        textdetector = TextDetector(DETECT_MODE='O')
        boxes, text_proposals= textdetector.detect(textsegs, scores[:, np.newaxis], img_res.shape[:2])
        boxes = np.array(boxes, dtype=np.int)
        text_proposals = np.array(text_proposals, dtype=np.int)
        # print('===text_proposals.shape:', text_proposals.shape)

        # #还原到原尺寸box
        # news_boxs = []
        # for i, box in enumerate(boxes):
        #     box = box[:8].reshape(-1, 2)
        #     box_temp = np.zeros(box.shape)
        #     box_temp[:, 0] = box[:, 0] / rw
        #     box_temp[:, 1] = box[:, 1] / rh
        #     news_boxs.append(box_temp.reshape(-1).astype(np.int))
        if boxes is not None:
            boxes[:, [0, 2, 4, 6]] = boxes[:,[0,2,4,6]] / rw
            boxes[:, [1, 3, 5, 7]] = boxes[:,[1,3,5,7]] / rh
            boxes[:, [0, 6]] = np.clip(boxes[:, [0, 6]] - 5, 0, ori_w - 1)#x1限制
            boxes[:, [2, 4]] = np.clip(boxes[:, [2, 4]] + 20, 0, ori_w - 1)#x2限制解决右边界压字

            boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]] - 5, 0, ori_h - 1)#y1限制
            boxes[:, [5, 7]] = np.clip(boxes[:, [5, 7]] + 5, 0, ori_h - 1)#y2限制

        news_text_proposals = []
        for item in text_proposals:
            item = item.reshape(-1, 2)
            item_temp = np.zeros(item.shape)
            item_temp[:, 0] = item[:, 0] / rw
            item_temp[:, 1] = item[:, 1] / rh
            news_text_proposals.append(item_temp.reshape(-1).astype(np.int))

        return boxes, np.array(news_text_proposals)

コード例 #4

0

ファイルを表示

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    img = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    line = ",".join(str(box[k]) for k in range(8))
                    line += "," + str(scores[i]) + "\n"
                    print(line)

コード例 #5

0

ファイルを表示

    def find(self):
        os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
        tf.reset_default_graph()
        with tf.get_default_graph().as_default():
            input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
            input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

            global_step = self.get_global_step()
            bbox_pred, cls_pred, cls_prob = model.model(input_image)
            variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
            saver = tf.train.Saver(variable_averages.variables_to_restore())
            
            with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
                ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
                model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
                print('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)

                print('===============')
        
                try:
                    im = cv2.imread(self.img_path)[:, :, ::-1]
                    
                except:
                    print("Error reading image {}!".format(self.img_path))

                img, (rh, rw) = self.resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                
                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
               
                for box in boxes:
                    box_idx = 0
                    while box_idx < 8:
                        if box_idx % 2 == 0:
                            witdth_scale = box[box_idx] / rw
                            box[box_idx] = self.round_half_up(witdth_scale)
                        else:
                            height_scale = box[box_idx] / rh
                            box[box_idx] = self.round_half_up(height_scale)
                        box_idx +=1

                boxes = np.array([box[:-1] for box in boxes], dtype=np.int)    
                return boxes

コード例 #6

0

ファイルを表示

ファイル: test_net_2015.py プロジェクト: fanjuncai/Net

def ctpn(sess, net, image_name):
    timer = Timer()
    timer.tic()

    img = cv2.imread(image_name)
    img = cv2.medianBlur(img, 3)
    img, scale = resize_im(img,
                           scale=TextLineCfg.SCALE,
                           max_scale=TextLineCfg.MAX_SCALE)

    scores, boxes = test_ctpn(sess, net, img)

    textdetector = TextDetector()
    boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
    draw_boxes(img, image_name, boxes, scale)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
    return timer.total_time

コード例 #7

0

ファイルを表示

def ctpn(image_url):
    '''
    调用ctpn接口进行图片解析
    :param image_url:
    :return img: 图片矩阵
    :return boxes: 矩形框坐标
    :return scores: 置信度
    '''
    start = time.time()
    images = requests.get(image_url).content
    buf = np.asarray(bytearray(images), dtype="uint8")
    im = cv2.imdecode(buf, cv2.IMREAD_COLOR)

    img, (rh, rw) = resize_image(im)
    h, w, c = img.shape
    im_info = np.array([h, w, c]).reshape([1, 3])
    input_image = [img.tolist()]
    url = 'http://127.0.0.1:7501/v1/models/ctpn:predict'
    data = json.dumps({
        "name": 'tfserving-ctpn',
        "signature_name": 'predict_images',
        "inputs": input_image
    })
    result = requests.post(url, data=data).json()
    cls_prob_val = np.asarray(result['outputs']['cls_prob_output'],
                              dtype=np.float32)
    bbox_pred_val = np.asarray(result['outputs']['bbox_pred_output'],
                               dtype=np.float32)
    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)

    scores = textsegs[:, 0]
    textsegs = textsegs[:, 1:5]

    textdetector = TextDetector(DETECT_MODE='H')
    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
    boxes = np.array(boxes, dtype=np.int)
    cost_time = (time.time() - start)
    print("cost time: {:.2f}s".format(cost_time))
    return img, boxes, scores

コード例 #8

0

ファイルを表示

ファイル: ctpn.py プロジェクト: Aoi-hosizora/ctpn-crnn-backend

def ctpnParse(im):
    '''
    转换获取图片文字区域组
    '''
    # im = cv2.imread(image_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            ####################################################################################
            # read img

            # print(image_path)
            start = time.time()
            try:
                im = im[:, :, ::-1]
            except:
                # print("Error reading image {}!".format(image_path))
                exit(1)

            ####################################################################################
            # resize

            img, (rh, rw) = resize_image(im)
            print("Ritu ", rh, rw)
            print("Mae: ", im.shape[0], im.shape[1])
            print("Ushiro: ", img.shape[0], img.shape[1])

            # Ritu: 0.6375 0.6333333333333333
            # Mae:  1280 960
            # Ushiro:  816 608

            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                    feed_dict={input_image: [img],
                                                                input_im_info: im_info})

            ####################################################################################
            # parse

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            # textdetector = TextDetector(DETECT_MODE='H')
            textdetector = TextDetector(DETECT_MODE='O')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            ####################################################################################
            # data

            cost_time = (time.time() - start)
            print("cost time: {:.2f}s".format(cost_time))

            # frames

            frames = []
            
            for i, box in enumerate(boxes):
                pnts = []
                # i = 01, 23, 45, 67
                pnts.extend({
                    "x": int(box[i * 2] / rh),
                    "y": int(box[i * 2 + 1] / rw)
                } for i in range(4))
                frames.append({
                    "points": pnts,
                    "score": scores[i]
                })

            return {
                "size": {
                    "x": im.shape[0],
                    "y": im.shape[1]
                },
                "cnt": len(boxes),
                "frames": frames
            }

            '''

コード例 #9

0

ファイルを表示

ファイル: demo.py プロジェクト: zhang-linrui/bank-card-number-recognition-CRNN

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                try:
                    im = cv2.imread(im_fn)
                    im = im[:im.shape[0] / 3, :, ::-1]  # only for roi
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                start0 = time.time()
                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                print("resize_image cost time: {:.2f}s".format(time.time() -
                                                               start0))

                start = time.time()
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })
                print("sess.run cost time: {:.2f}s".format(time.time() -
                                                           start))

                start = time.time()
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                print("proposal_layer cost time: {:.2f}s".format(time.time() -
                                                                 start))

                start = time.time()
                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                print("textdetector cost time: {:.2f}s".format(time.time() -
                                                               start))

                print("total cost time: {:.2f}s".format(time.time() - start0))

                for i, box in enumerate(boxes):
                    img = cv2.resize(img[box[1]:box[5], box[0]:box[4]],
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(
                            FLAGS.output_path,
                            os.path.basename(im_fn).replace(
                                '.', '_' + str(i) + '.')), img[:, :, ::-1])
                '''for i, box in enumerate(boxes):
                    cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                  thickness=2)
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])'''

                with open(
                        os.path.join(
                            FLAGS.output_path,
                            os.path.splitext(os.path.basename(im_fn))[0]) +
                        ".txt", "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)

コード例 #10

0

ファイルを表示

    def start(self) :
        self.running = True

        tf.app.flags.DEFINE_string('gpu', '0', '')
        # 已经训练好的模型加载路径
        tf.app.flags.DEFINE_string('checkpoint_path', self.checkpoint_path, '')

        # 图
        with tf.compat.v1.get_default_graph().as_default():
            # 占位符 - 输入图片
            input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
            # 占位符 - 输入图片信息
            input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

            # 创建一个变量 global_step
            global_step = tf.compat.v1.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

            # tensorflow op
            bbox_pred, cls_pred, cls_prob = model.model(input_image)

            variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
            saver = tf.compat.v1.train.Saver(variable_averages.variables_to_restore())

            # tensorflow session 配置
            sessionConfig = tf.compat.v1.ConfigProto(allow_soft_placement=True) 
            # 显存占用率
            # sessionConfig.gpu_options.per_process_gpu_memory_fraction = 0.3
            # 动态申请内存
            sessionConfig.gpu_options.allow_growth = True

            with tf.compat.v1.Session(config=sessionConfig) as sess:

                # 基于 checkpoint 文件(ckpt)加载参数
                ckpt_state = tf.compat.v1.train.get_checkpoint_state(self.checkpoint_path)

                # 模型路径
                model_path = os.path.join(self.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))

                logger.info(u'Restore from {}'.format(model_path))

                # 恢复变量
                saver.restore(sess, model_path)

                while self.running:

                    logger.info(u'等待接收图片')

                    imgFilePath = self.workerQueue.get()
                    if self.is_stop_signal(imgFilePath):
                        logger.info(u'接收到队列停止信号')
                        break

                    logger.info(u'开始处理图片： {}'.format(imgFilePath))

                    # 开始计时
                    start = time.time()
                    try:
                        im = cv2.imread(imgFilePath)[:, :, ::-1]
                    except:
                        logger.exception(sys.exc_info())
                        continue

                    # 压缩图片尺寸，不超过 600 * 1200
                    img, (rh, rw) = self.resize_image(im)
                    # 高、宽、通道数
                    h, w, c = img.shape
                    im_info = np.array([h, w, c]).reshape([1, 3])

                    # 执行运算
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                        feed_dict={input_image: [img],
                                                                    input_im_info: im_info})

                    # 根据RPN目标回归值修正anchors并做排序、nms等后处理输出由proposal坐标和batch_ind全0索引组成的blob
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]

                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])

                    # 结束计时

                    logger.info(u'总计耗时： {}'.format(time.time() - start))

                    if self.debug:
                        with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".json",
                                "w") as f:
                            f.writelines(json.dumps(self.wrapResult(boxes, scores)))

                        # 将 python 数组 转换为 numpy 数组
                        boxes = np.array(boxes, dtype=np.int)

                        for i, box in enumerate(boxes):
                            cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                        thickness=2)
                        img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)

                        cv2.imwrite(os.path.join(self.outputPath, os.path.basename(imgFilePath)), img[:, :, ::-1])

                        with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".txt",
                                "w") as f:
                            for i, box in enumerate(boxes):
                                line = ",".join(str(box[k]) for k in range(8))
                                line += "," + str(scores[i]) + "\n"
                                f.writelines(line)
                    
                    if self.callback :
                        self.callback(fileName = imgFilePath, ctpnRes = self.wrapResult(boxes, scores))

コード例 #11

0

ファイルを表示

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    orig = im.copy()
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    reshaped_coords = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                    cv2.polylines(img, reshaped_coords, True, color=(0, 255, 0),
                                  thickness=2)

                    reshaped_coords = np.asarray(reshaped_coords)
                    roi = img[reshaped_coords[0][0][0][1]:reshaped_coords[0][2][0][1], reshaped_coords[0][0][0][0]:reshaped_coords[0][2][0][0]]
                    
                    text = pytesseract.image_to_string(roi, config=config)

                    text = unidecode.unidecode(text)
                    cv2.putText(img, text, (reshaped_coords[0][0][0][0], reshaped_coords[0][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
                
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)

# if __name__ == '__main__':
    # tf.app.run()

# def delete_prev(path):
    
#     for the_file in os.listdir(path):
#         file_path = os.path.join(path, the_file)
#         try:
#             if os.path.isfile(file_path):
#                 os.unlink(file_path)
#             elif os.path.isdir(file_path): shutil.rmtree(file_path)
#         except Exception as e:
#             print(e)
#             continue

# app = Flask(__name__)
# app._static_folder = os.path.basename('static')

# UPLOAD_FOLDER = os.path.join('main', 'uploads')
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# @app.route('/')
# def hello_world():
#     return render_template('home_al.html')

# @app.route('/upload', methods=['POST', 'GET'])
# def upload_file():

#     if request.method == 'POST':
#         file = request.files['image']
#         filename = file.filename

#         # prepare directory for processing
#         delete_prev(app.config['UPLOAD_FOLDER'])
#         f = os.path.join(app.config['UPLOAD_FOLDER'], filename)

#         # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post)
#         file.save(f)

#         tf.app.run()

#         print('done')
#         processed_file = os.path.join('data/res', filename)

#         # return render_template('home_al.html', processed_file = processed_file)
#         return redirect(url_for('send_file', filename=filename))
#         print('redirected to', url_for('send_file', filename=filename))
#     else:

#         print('No request')
#         return render_template('home_al.html')

# # @app.route('/show/<filename>')
# # def uploaded_file(filename):
# #     filename = 'http://127.0.0.1:5000/upload/' + filename
# #     return render_template('home_al.html')

# @app.route('/uploaded/<filename>')
# def send_file(filename):
#     return send_from_directory('data/res', filename)

# app.run(debug=True)

コード例 #12

0

ファイルを表示

ファイル: demo.py プロジェクト: aswin-jacob-thomas/text-detection-ctpn

def main(im=None):
    # if os.path.exists(FLAGS.output_path):
    #     shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    checkpoint_path = 'checkpoints_mlt/'
    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.compat.v1.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(
                checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            # print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            # im_fn_list = get_images()
            # for im_fn in im_fn_list:
            # print('===============')
            # print(im_fn)
            # start = time.time()
            # try:
            #     im = cv2.imread(im_fn)[:, :, ::-1]
            # except:
            #     print("Error reading image {}!".format(im_fn))
            #     continue

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            # cost_time = (time.time() - start)
            # print("cost time: {:.2f}s".format(cost_time))

            # for i, box in enumerate(boxes):
            #     cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
            #                   thickness=2)
            # img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
            # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])
            return_array = []

            for i, box in enumerate(boxes):
                box[0] = box[0] / rh
                box[2] = box[2] / rh
                box[1] = box[1] / rw
                box[7] = box[7] / rw
                return_array.append([box[0], box[1], box[2], box[7]])
                # print(return_array)
                # line += ",".join(str(box[k]) for k in [0,1,2,7]) +'),\r\n'
            return return_array

コード例 #13

0

ファイルを表示

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                #print('===============')
                #print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                #print("cost time: {:.2f}s".format(cost_time))

                if len(boxes) != 1:
                    print(im_fn, len(boxes))

                flag = -1
                for i, box in enumerate(boxes):
                    #        cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),thickness=2)
                    #img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)

                    arr = np.array(box[:8].astype(np.int32).reshape((-1, 2)))
                    #print(arr)
                    x1 = min(arr[:, 0])
                    x2 = max(arr[:, 0])
                    y1 = min(arr[:, 1])
                    y2 = max(arr[:, 1])
                    pad_w = int((y2 - y1) * 0.5)
                    img_cp = img[y1 - 10:y2 + 10, x1 - pad_w:x2 + pad_w, :]
                    #print(x1,x2,y1,y2)

                    if flag < (x2 - x1) / (y2 - y1):
                        flag = (x2 - x1) / (y2 - y1)

                        if flag > 3:
                            cv2.imwrite(
                                os.path.join(FLAGS.output_path,
                                             os.path.basename(im_fn)),
                                img_cp[:, :, ::-1])
                '''

コード例 #14

0

ファイルを表示

def main(argv):
    of_list = []
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # print("dickk")
            # print(sys.argv[1])
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            print(im_fn_list)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                print("printing im.shape")
                print(im.shape)
                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)

                    box_arr = box[:8].astype(np.int32)
                    y = box_arr[0]
                    x = box_arr[1]
                    h = box_arr[2] - box_arr[0]
                    w = box_arr[5] - box_arr[3]

                    img1, (rh, rw) = resize_image(im)
                    img2 = img1[x:x + w, y:y + h, :]
                    cv2.imwrite(FLAGS.output_path + str(i) + '.png', img2)
                    of_list.append(FLAGS.output_path + str(i) + '.png')

コード例 #15

0

ファイルを表示

ファイル: test.py プロジェクト: gengyi/RMBRecognization

def main(argv=None):
    if train_or_test_1800 == 'no_seperate_mianzhi_train' or train_or_test_1800 == 'no_seperate_mianzhi_test':
        if os.path.exists(FLAGS.output_path):
            shutil.rmtree(FLAGS.output_path)
        os.makedirs(FLAGS.output_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            ii = a

            for im_fn in im_fn_list[int(a):b]:  #修改这里
                ii += 1
                print(str(ii) + '===============' + str(ii))
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                try:

                    img, (rh, rw) = resize_image(im)
                    h, w, c = img.shape
                    im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run(
                        [bbox_pred, cls_prob],
                        feed_dict={
                            input_image: [img],
                            input_im_info: im_info
                        })
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                                 im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:
                                        5]  # 每张图片N个poly，textsegs是这些poly的四个坐标。

                    textdetector = TextDetector(DETECT_MODE='H')

                    boxes = textdetector.detect(
                        textsegs, scores[:, np.newaxis],
                        img.shape[:2])  #xzy 方法内部已修改，只显示一个框
                    boxes = np.array(boxes, dtype=np.int)

                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))

                    for i, box in enumerate(boxes):
                        # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                        #               thickness=2)
                        img = img[int(box[1]):int(box[5]),
                                  int(box[0]):int(box[2])]  # xzy 裁剪

                    img = cv2.resize(img,
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     os.path.basename(im_fn)), img[:, :, ::-1])
                except Exception as e:  #xzy   Corrupt JPEG data: premature end of data segment
                    immmm = cv2.imread(
                        "../../../dataset_warm_up/train_data/13X6EGWI.jpg"
                    )  #xzy 可能WBNGQ9R7.jpg出错
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     "xzywa" + str(os.path.basename(im_fn))),
                        immmm[:, :, ::-1])
                    print(str(im_fn) + " is broken!!!!!!!!")

コード例 #16

0

ファイルを表示

ファイル: demo.py プロジェクト: dangvansam98/text-detection-recognize-ctpn-tesseract

def main(argv=None):
    # if os.path.exists(FLAGS.output_path):
    # shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    # print(FLAGS.output_path)
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        print("init sess")
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state('checkpoints_mlt/')
            model_path = os.path.join(
                'checkpoints_mlt/',
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            #im_fn_list = get_images()
            print('===============')
            im = rotate_img('hoadontiendien-3.png')
            print(im.shape)

            cv2.imwrite('rotated2.png', im[:, :, :])
            print("write rotate img")
            start = time.time()

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            cost_time = (time.time() - start)
            print("cost time: {:.2f}s".format(cost_time))
            min_x, max_x, min_y, max_y = 0, w, 0, h
            box_minx = min([b[0] for b in boxes])
            box_miny = min([b[1] for b in boxes])
            box_maxx = max([b[4] for b in boxes])
            box_maxy = max([b[5] for b in boxes])
            print(box_minx, box_miny)
            print(box_maxx, box_maxy)
            crop_img = img[box_miny:box_maxy, box_minx:box_maxx]
            print(crop_img.shape)

            # for b in boxes:
            # if b[0] <
            # texts = []
            for i, box in enumerate(boxes):
                cv2.polylines(img,
                              [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                              True,
                              color=(0, 255, 0),
                              thickness=1)
                #crop_img2 = img[box[1]-5:box[5]+5, box[0]:box[4]]
            img = cv2.resize(img,
                             None,
                             None,
                             fx=1.0 / rh,
                             fy=1.0 / rw,
                             interpolation=cv2.INTER_LINEAR)
            #print(img[:, :, ::-1].shape)
            #cv2.imshow('aaa',img[:, :, ::-1])
            #cv2.waitKey()

            cv2.imwrite('rotate_cuted2.png', crop_img[:, :, :])

コード例 #17

0

ファイルを表示

def process():

    output = {
        'path': None,
        'percentage': 0,
        'locate_time': 0,
        'ocr_time': 0,
        'ocr_text': [],
        'err': False
    }

    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    index = 0
    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)
            #file_whole = open('data/res/text/whole.txt','w')
            im_fn_list = get_images()
            start_all = time.time()

            for count, im_fn in enumerate(im_fn_list):
                output["err"] = False
                output["path"] = im_fn
                output["ocr_text"].clear()
                output["percentage"] = count / len(im_fn_list)
                print('===============')
                print(
                    im_fn
                )  #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    output["err"] = True
                    yield output
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                # DETECT_MODE can be H / O depending on context
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                output["locate_time"] = cost_time
                print("cost time: {:.2f}s".format(cost_time))
                '''
                Do the text recognition
                '''

                text_start = time.time()
                grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                ########################################################
                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                    ###################################################
                    # First get the number id
                    startX = box[0]
                    startY = box[1]
                    endX = box[4]
                    endY = box[5]
                    ret, thresh = cv2.threshold(img, 127, 255,
                                                cv2.THRESH_BINARY_INV)
                    roi = thresh[startY:endY, startX:endX]

                    ###################################################
                    # Single out the digit

                    ###################################################

                    # in order to apply Tesseract v4 to OCR text we must supply
                    # (1) a language, (2) an OEM flag of 4, indicating that the we
                    # wish to use the LSTM neural net model for OCR, and finally
                    # (3) an OEM value, in this case, 7 which implies that we are
                    # treating the ROI as a single line of text

                    config = ("-l digits --oem 1 --psm 7")
                    # config = ("--oem 0 -c tessedit_char_whitelist=0123456789")
                    text = pytesseract.image_to_string(roi, config=config)
                    output["ocr_text"].append(text)
                    # add the bounding box coordinates and OCR'd text to the list
                    # of results
                    # Only print if number is detected

                    #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                    if text.isdigit():
                        print(text)
                        if len(text) == 4:
                            data = im_fn.split("/")
                            fn = data[len(data) - 1]  # 73-500_0.jpg
                            folder = data[len(data) - 4] + '/' + data[len(
                                data
                            ) - 3] + '/' + data[
                                len(data) -
                                2]  # recording_2019_10_30/bbq/cam_bbq-8000-18120
                            print(folder + '/' + fn)
                            fn_data = fn.split("-")
                            id_num = fn_data[0]  #73
                            image_name = fn_data[1]  #500_0.jpg

                            directory = 'OCR_text/' + folder + '/'
                            directory = os.path.join(root, directory)
                            if not os.path.exists(directory):
                                os.makedirs(directory)
                            file_whole = open(
                                directory + 'whole-' + id_num + '.txt', 'a')
                            file_whole.write(folder + '/' + fn + ':' + text +
                                             '\n')
                            file_whole.close()
                            #cv2.imwrite(str(index) + '.png', roi)
                            index += 1

# results.append(((startX, startY, endX, endY), text))
                output["ocr_time"] = time.time() - text_start

                ########################################################
                '''
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
                '''
                yield output
            cost_time_all = (time.time() - start_all)
            print("Total cost time: {:.2f}s".format(cost_time_all))

コード例 #18

0

ファイルを表示

ファイル: demo.py プロジェクト: deeplearningvn/text-detection

def main(argv=None):

    print('Mode :%s' % FLAGS.detect_mode)

    sys.path.append(os.getcwd())

    from utils.text_connector.detectors import TextDetector
    from nets import model_train as model
    from utils.rpn_msr.proposal_layer import proposal_layer

    if FLAGS.output_path:
        # if need overide output? may be no need for testing
        # shutil.rmtree(FLAGS.output_path)

        if not os.path.exists(FLAGS.output_path):
            os.makedirs(FLAGS.output_path)

        image_path = os.path.join(FLAGS.output_path, "image")
        label_path = os.path.join(FLAGS.output_path, "label")
        if not os.path.exists(image_path):
            os.makedirs(image_path)
        if not os.path.exists(label_path):
            os.makedirs(label_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            # print(im_fn_list)

            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)

                try:
                    im = cv2.imread(im_fn)  # [:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im, FLAGS.image_size)
                img = cv2.detailEnhance(img)

                # process image
                start = time.time()
                h, w, c = img.shape
                # print(h, w, rh, rw)
                im_info = np.array([h, w, c]).reshape([1, 3])

                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                thickness = max(1, int(im.shape[0] / 400))
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode)
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.float64)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # applied to result and fix scale
                for i, box in enumerate(boxes):
                    box[:8][::2] /= rh
                    box[1:8][::2] /= rh

                basename = os.path.basename(im_fn)
                if FLAGS.output_path:

                    bfn, ext = os.path.splitext(basename)
                    gt_path = os.path.join(FLAGS.output_path, "label",
                                           'gt_' + bfn + '.txt')
                    img_path = os.path.join(FLAGS.output_path, "image",
                                            basename)
                    # save image and coordination, may be resize image
                    # cv2.imwrite(img_path, im)
                    shutil.copyfile(im_fn, img_path)
                    with open(gt_path, "w") as f:
                        for i, box in enumerate(boxes):
                            line = ",".join(str(int(box[k])) for k in range(8))
                            # line += "," + str(scores[i]) + "\r\n"
                            # store label as 0-9 for simple
                            line += "," + str(i % 10) + "\r\n"
                            f.writelines(line)
                else:
                    # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN)
                    # cv2.setWindowProperty(
                    #     basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

                    # draw polyline and show
                    for i, box in enumerate(boxes):
                        points = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                        cv2.polylines(im,
                                      points,
                                      True,
                                      color=(0, 255, 0),
                                      thickness=thickness,
                                      lineType=cv2.LINE_AA)
                    cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(basename, w, h)
                    cv2.imshow(basename, im)
                    cv2.waitKey(0)

コード例 #19

0

ファイルを表示

ファイル: demo.py プロジェクト: sinny777/OCR

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    textExtractor = TessaractImpl(CONFIG)

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                dataBoxes = []
                for i, box in enumerate(boxes):
                    crop_img = cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                  thickness=2)
                    # cv2.imshow("newImage", img)
                    # cv2.waitKey(0)
                    bbx_data = box[:8].astype(np.int32).reshape((-1, 1, 2))
                    startX, startY, endX, endY = crop_image_box(bbx_data)
                    crop_img = img[startY:endY, startX:endX]
                    dataBox = {"boxImg": crop_img}
                    dataBoxes.append(dataBox)

                print(textExtractor.extractData(dataBoxes))
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)

コード例 #20

0

ファイルを表示

ファイル: predict.py プロジェクト: laugha/cptn-crnn

def main(argv=None):
    # if os.path.exists(FLAGS.output_path):
    #     shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                img = cv2.imread(im_fn)
                img_size = img.shape
                # 旋转竖的图片
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    im = cv2.transpose(im)
                    im = cv2.flip(im, 0)
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                # print(scores)
                # print(textsegs)

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                # print(boxes)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # for i, box in enumerate(boxes):
                # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                #               thickness=2)
                img = cv2.resize(img,
                                 None,
                                 None,
                                 fx=1.0 / rh,
                                 fy=1.0 / rw,
                                 interpolation=cv2.INTER_LINEAR)
                # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(
                        os.path.join(FLAGS.output_path, 'txt',
                                     "cpth_result.txt"), "a") as f:
                    for i, box in enumerate(boxes):
                        line = os.path.basename(im_fn)
                        line += ","
                        line += ",".join(str(box[k]) for k in range(8))
                        line += ","
                        line += str(i)
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
                        # print('begin.....')
                        maxy = int(max(box[1:8:2]) / rw)
                        miny = int(min(box[1:8:2]) / rw)
                        maxx = int(max(box[:8:2]) / rh)
                        minx = int(min(box[:8:2]) / rh)
                        # print(img.shape)
                        # print(maxy, miny, maxx, minx)
                        img_new = img[miny:maxy, minx:maxx]
                        cv2.imwrite(
                            os.path.join(
                                FLAGS.output_path, 'img',
                                os.path.basename(im_fn).replace(
                                    '.jpg', '_' + str(i) + '.jpg')), img_new)

コード例 #21

0

ファイルを表示

ファイル: demo.py プロジェクト: dangvansam98/text-detection-recognize-ctpn-tesseract

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    print(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                texts = []
                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                    crop_img = img[box[1] - 5:box[5] + 5, box[0]:box[4]]
                    #print(crop_img.shape)
                    crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
                    crop_img = unsharp_mask(crop_img)
                    try:
                        text = pytesseract.image_to_string(
                            crop_img, config='-l vie --psm 13')
                    except:
                        print("OCR Error")
                        text = "error"
                    print(text)
                    texts.append(text)

                img = cv2.resize(img,
                                 None,
                                 None,
                                 fx=1.0 / rh,
                                 fy=1.0 / rw,
                                 interpolation=cv2.INTER_LINEAR)
                #print(img[:, :, ::-1].shape)
                #cv2.imshow('aaa',img[:, :, ::-1])
                #cv2.waitKey()
                cv2.imwrite(
                    os.path.join(FLAGS.output_path, os.path.basename(im_fn)),
                    img[:, :, ::-1])

                with open(os.path.join(
                        FLAGS.output_path,
                        os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w",
                          encoding="UTF-8") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(texts[i]) + "\r\n"
                        #print(line)
                        f.writelines(line)