Esempio n. 1
0
def process_one(im_fn):
    print('===============')
    print(im_fn)
    start = time.time()
    try:
        im = cv2.imread(im_fn)[:, :, ::-1]
    except:
        print("Error reading image {}!".format(im_fn))
        return None, None

    img, (rh, rw) = resize_image(im)
    h, w, c = img.shape
    im_info = np.array([h, w, c]).reshape([1, 3])
    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                           feed_dict={
                                               input_image: [img],
                                               input_im_info: im_info
                                           })

    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
    scores = textsegs[:, 0]
    textsegs = textsegs[:, 1:5]

    textdetector = TextDetector(DETECT_MODE='O')
    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
    boxes = np.array(boxes, dtype=np.int)

    cost_time = (time.time() - start)
    print("cost time: {:.2f}s".format(cost_time))

    img2 = img.copy()

    # draw boxes
    for i, box in enumerate(boxes):
        cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                      True,
                      color=(0, 255, 0),
                      thickness=2)
    img = cv2.resize(img,
                     None,
                     None,
                     fx=1.0 / rh,
                     fy=1.0 / rw,
                     interpolation=cv2.INTER_LINEAR)
    cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)),
                img[:, :, ::-1])

    with open(
            os.path.join(FLAGS.output_path,
                         os.path.splitext(os.path.basename(im_fn))[0]) +
            ".txt", "w") as f:
        for i, box in enumerate(boxes):
            line = ",".join(str(box[k]) for k in range(8))
            line += "," + str(scores[i]) + "\r\n"
            f.writelines(line)

    # 返回未画框的图片
    return img2, boxes
def use_ctpn_net(img, filename):
    h0, w0, c0 = img.shape
    if h0 > w0:
        w0 = int(1.0 * 640 * w0 / h0)
        h0 = 640
    else:
        h0 = int(1.0 * 640 * h0 / w0)
        w0 = 640
    roi = cv2.resize(img, (w0, h0), interpolation=cv2.INTER_AREA)
    h, w, c = roi.shape
    im_info = np.array([h, w, c]).reshape([1, 3])
    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                           feed_dict={
                                               input_image: [roi],
                                               input_im_info: im_info
                                           })
    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
    scores = textsegs[:, 0]
    textsegs = textsegs[:, 1:5]
    textdetector = TextDetector(DETECT_MODE='O')
    boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                roi.shape[:2])  # 通过调参或换模型可优化效率效果
    try:
        box = boxes[0]
        #box[2] += 10
        #box[4] += 10
        pts1 = np.float32([[box[0], box[1]], [box[2], box[3]],
                           [box[6], box[7]], [box[4], box[5]]])
        pts2 = np.float32([[0, 0], [256, 0], [0, 32], [256, 32]])
        M = cv2.getPerspectiveTransform(pts1, pts2)
        image_dst = cv2.warpPerspective(roi, M, (256, 32))
        cv2.imwrite('res_detection/' + filename, image_dst)
        res = single_recognition(image_dst)
        print res
        cv2.polylines(roi, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                      True,
                      color=(0, 0, 255),
                      thickness=2)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(roi, res, (int(box[0]), int(box[1]) - 30), font, 1,
                    (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imwrite('res_recognition/' + filename, roi)
    except:
        print "fail to locate target"
Esempio n. 3
0
def ctpn_recognition(test_images_path, app):
    if os.path.exists(params.middle_path):
        shutil.rmtree(params.middle_path)
    os.makedirs(params.middle_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(
            tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(
            tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable(
            'global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(params.checkpoint_path)
            model_path = os.path.join(params.checkpoint_path, os.path.basename(
                ckpt_state.model_checkpoint_path))
            app.logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im = cv2.imread(test_images_path)[:, :, ::-1]

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={input_image: [img],
                                                              input_im_info: im_info})

            textsegs, _ = proposal_layer(
                cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(
                textsegs, scores[:, np.newaxis], img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            img_copy = img.copy()

            boxes_array = np.array(boxes, dtype=np.int)

            widths = {}
            for i, box in enumerate(boxes_array):
                width, height = get_wh(box[:8].tolist())  # 计算宽高比
                widths[width] = [i, height]

            width_max = max(widths)
            width_max_value = widths[width_max]
            part_img = img.copy()

            for i, box in enumerate(boxes_array):

                color = (0, 255, 0)

                if i == width_max_value[0] and width_max_value[1] > 20:
                    color = (255, 0, 0)
                    box[0] = box[0] - 5
                    box[2] = box[2] + 5
                    part_img = img[box[1]:box[5], box[0]:box[2]][:, :, 0]

                cv2.polylines(img_copy, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=color,
                              thickness=2)

            img_copy = cv2.resize(
                img_copy, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
            cv2.imwrite(os.path.join(params.middle_path,
                                     os.path.basename(test_images_path)), img_copy[:, :, ::-1])

            part_img = Image.fromarray(part_img.astype('uint8'))

            return part_img
Esempio n. 4
0
def ctpn_pred(input_path, output_path,textloc_output_path, checkpoint_path,gpu):
    print("========== detect text using ctpn ==============")
    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    os.makedirs(output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():
        
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images(input_path)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)

                #remove existing detected component from text
                txtfileloc = os.path.join (textloc_output_path, os.path.splitext(os.path.basename(im_fn))[0]) + "_loc.txt"
                if os.path.isfile(txtfileloc):
                    f =  open(txtfileloc, 'r+')
                    textlines = txtremove(f,['ORIG','ROT'])
                    f.seek(0)
                    f.write(textlines)
                    f.truncate()
                    f.close()   

                start = time.time()
                try:
                    img_raw = cv2.imread(im_fn)
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue
                # image used to draw bounding box
                H, W, _ = img_raw.shape
                img_blank =  np.ones(shape=[H, W], dtype=np.uint8)*255
                img_draw = img_raw.copy()

                img, (rh, rw) = resize_image(img_raw)
                h, w, c = img.shape
                res = []
                for ifrot in ['ORIG','ROT']:
                    im = img.copy()

                    if ifrot == 'ROT':
                        im = cv2.transpose(im)
                        im = cv2.flip(im,1)
                        bbox_color = (255,0,0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else: 
                        bbox_color = (0,255,0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                           feed_dict={input_image: [im],
                                                                      input_im_info: im_info})
    
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]
                    
                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)
                    print("Find number of text:",len(boxes))
                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))
                    fx=1.0 / rw
                    fy=1.0 / rh

                    for i, box in enumerate(boxes):
                        if ifrot == 'ROT':
                            box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]])
                            
                        #resize the images
                        box[:8:2] = (box[:8:2]*fx).astype(np.int32)
                        box[1::2] = (box[1::2]*fy).astype(np.int32)
                        loc = [int(i) for i in box[0:-1]]
                       
                        # crop image with rectangle box and save
                        x0,y0,w0,h0 = cv2.boundingRect(np.array(loc[:8]).astype(np.int32).reshape((-1, 2)))
                        img_crop = img_blank[y0:y0+h0,x0:x0+w0].copy()
                        hc, wc = img_crop.shape[:2]
                        countzero = hc*wc - cv2.countNonZero(img_crop)

                        if countzero *1.0 / (hc*wc) <= 0.2:
                            # if there is minimum overlap with previous bounding box
                            
                            cv2.drawContours(img_blank, [np.array(loc).reshape((-1,1,2))], 0, (0), thickness = -1, lineType=8)
                            cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2)
                            # crop image with rectangle box and save
                            x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2)))
                            img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy()
                            txtrecog = txt_recog(img_crop)
                            res.append([ifrot]+loc+[txtrecog]) 
                            cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+"_"+str(format(i, "04"))+".jpg", img_crop) 
                            cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) 
                cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+".jpg", img_draw) 
                
                with open(txtfileloc, "a") as f:
                    for i, ir in enumerate(res):
                        line = "\t".join(str(ir[k]) for k in range(10))
                        line += "\n"
                        print(line)
                        f.writelines(line)
                    f.close()
Esempio n. 5
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():

        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                im, (rh, rw) = resize_image(im)
                h, w, c = im.shape
                img = im
                for im_rot in ['orig', 'rot90']:
                    if im_rot == 'rot90':
                        img = cv2.transpose(img)
                        img = cv2.flip(img, 1)
                        bbox_color = (255, 0, 0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else:
                        bbox_color = (0, 255, 0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run(
                        [bbox_pred, cls_prob],
                        feed_dict={
                            input_image: [img],
                            input_im_info: im_info
                        })

                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                                 im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]

                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:,
                                                                 np.newaxis],
                                                img.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)

                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))

                    for i, box in enumerate(boxes):
                        if im_rot == 'rot90':
                            box = np.array([
                                box[3], h - box[2], box[5], h - box[4], box[7],
                                h - box[6], box[1], h - box[0], box[8]
                            ])

                        cv2.polylines(
                            im, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                            True,
                            color=bbox_color,
                            thickness=2)

                    #im = cv2.resize(im, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     im_rot + "-" + os.path.basename(im_fn)),
                        im[:, :, ::-1])

                    with open(
                            os.path.join(
                                FLAGS.output_path,
                                os.path.splitext(os.path.basename(im_fn))[0]) +
                            ".txt", "a") as f:
                        f.writelines("\n")
                        for i, box in enumerate(boxes):
                            line = ",".join(str(box[k]) for k in range(8))
                            line += "," + str(scores[i]) + "\r\n"
                            f.writelines(line)
                        f.close()
Esempio n. 6
0
def ctpn():
    if os.path.exists(FLAGS.ctpn_output_path):
        shutil.rmtree(FLAGS.ctpn_output_path)
    os.makedirs(FLAGS.ctpn_output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():
        
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images(FLAGS.ctpn_input_path)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    img_raw = cv2.imread(im_fn)
                    #(thresh, img_raw) = cv2.threshold(img_raw, 127, 255, cv2.THRESH_BINARY)
                    # Create kernel
                    #kernel = np.array([[-1, -1, -1], 
                    #                   [-1, 9,-1], 
                    #                   [-1, -1, -1]])

                    # Sharpen image
                    #img_raw = cv2.filter2D(img_raw, -1, kernel)
 
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue
                img_draw = img_raw.copy()

                img, (rh, rw) = resize_image(img_raw)
                # image used to draw bounding box

                h, w, c = img.shape
                for ifrot in ['orig','rot']:
                    im = img.copy()

                    if ifrot == 'rot':
                        im = cv2.transpose(im)
                        im = cv2.flip(im,1)
                        bbox_color = (255,0,0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else: 
                        bbox_color = (0,255,0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                           feed_dict={input_image: [im],
                                                                      input_im_info: im_info})
    
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]
                    
                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)
                    print(len(boxes))
                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))
                    fx=1.0 / rw
                    fy=1.0 / rh
                    for i, box in enumerate(boxes):
                        if ifrot == 'rot':
                            box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]])
                        #resize the images
                        box[:8:2] = (box[:8:2]*fx).astype(np.int32)
                        box[1::2] = (box[1::2]*fy).astype(np.int32)
                        
                        cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2)
                        # crop image with rectangle box and save
                        x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2)))
                        img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy()

                        cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+str(format(i, "04"))+"-"+os.path.basename(im_fn)), img_crop) 
                        cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) 
   
                    #im = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.basename(im_fn)),img_draw[:, :, ::-1])

                    with open(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                                "a") as f:
                        for i, box in enumerate(boxes):
                            line = ",".join(str(box[k]) for k in range(8))
                            line += "," + str(scores[i]) + "\r\n"
                            f.writelines(line)
                        f.close()