Exemple #1
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    img = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    line = ",".join(str(box[k]) for k in range(8))
                    line += "," + str(scores[i]) + "\n"
                    print(line)
Exemple #2
0
    def find(self):
        os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
        tf.reset_default_graph()
        with tf.get_default_graph().as_default():
            input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
            input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

            global_step = self.get_global_step()
            bbox_pred, cls_pred, cls_prob = model.model(input_image)
            variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
            saver = tf.train.Saver(variable_averages.variables_to_restore())
            
            with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
                ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
                model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
                print('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)

                print('===============')
        
                try:
                    im = cv2.imread(self.img_path)[:, :, ::-1]
                    
                except:
                    print("Error reading image {}!".format(self.img_path))

                img, (rh, rw) = self.resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                
                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
               
                for box in boxes:
                    box_idx = 0
                    while box_idx < 8:
                        if box_idx % 2 == 0:
                            witdth_scale = box[box_idx] / rw
                            box[box_idx] = self.round_half_up(witdth_scale)
                        else:
                            height_scale = box[box_idx] / rh
                            box[box_idx] = self.round_half_up(height_scale)
                        box_idx +=1

                boxes = np.array([box[:-1] for box in boxes], dtype=np.int)    
                return boxes
def main(argv=None):

    print('Mode :%s' % FLAGS.detect_mode)

    sys.path.append(os.getcwd())

    from utils.text_connector.detectors import TextDetector
    from nets import model_train as model
    from utils.rpn_msr.proposal_layer import proposal_layer

    if FLAGS.output_path:
        # if need overide output? may be no need for testing
        # shutil.rmtree(FLAGS.output_path)

        if not os.path.exists(FLAGS.output_path):
            os.makedirs(FLAGS.output_path)

        image_path = os.path.join(FLAGS.output_path, "image")
        label_path = os.path.join(FLAGS.output_path, "label")
        if not os.path.exists(image_path):
            os.makedirs(image_path)
        if not os.path.exists(label_path):
            os.makedirs(label_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            # print(im_fn_list)

            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)

                try:
                    im = cv2.imread(im_fn)  # [:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im, FLAGS.image_size)
                img = cv2.detailEnhance(img)

                # process image
                start = time.time()
                h, w, c = img.shape
                # print(h, w, rh, rw)
                im_info = np.array([h, w, c]).reshape([1, 3])

                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                thickness = max(1, int(im.shape[0] / 400))
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode)
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.float64)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # applied to result and fix scale
                for i, box in enumerate(boxes):
                    box[:8][::2] /= rh
                    box[1:8][::2] /= rh

                basename = os.path.basename(im_fn)
                if FLAGS.output_path:

                    bfn, ext = os.path.splitext(basename)
                    gt_path = os.path.join(FLAGS.output_path, "label",
                                           'gt_' + bfn + '.txt')
                    img_path = os.path.join(FLAGS.output_path, "image",
                                            basename)
                    # save image and coordination, may be resize image
                    # cv2.imwrite(img_path, im)
                    shutil.copyfile(im_fn, img_path)
                    with open(gt_path, "w") as f:
                        for i, box in enumerate(boxes):
                            line = ",".join(str(int(box[k])) for k in range(8))
                            # line += "," + str(scores[i]) + "\r\n"
                            # store label as 0-9 for simple
                            line += "," + str(i % 10) + "\r\n"
                            f.writelines(line)
                else:
                    # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN)
                    # cv2.setWindowProperty(
                    #     basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

                    # draw polyline and show
                    for i, box in enumerate(boxes):
                        points = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                        cv2.polylines(im,
                                      points,
                                      True,
                                      color=(0, 255, 0),
                                      thickness=thickness,
                                      lineType=cv2.LINE_AA)
                    cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(basename, w, h)
                    cv2.imshow(basename, im)
                    cv2.waitKey(0)
Exemple #4
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    orig = im.copy()
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    reshaped_coords = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                    cv2.polylines(img, reshaped_coords, True, color=(0, 255, 0),
                                  thickness=2)

                    reshaped_coords = np.asarray(reshaped_coords)
                    roi = img[reshaped_coords[0][0][0][1]:reshaped_coords[0][2][0][1], reshaped_coords[0][0][0][0]:reshaped_coords[0][2][0][0]]
                    
                    text = pytesseract.image_to_string(roi, config=config)

                    text = unidecode.unidecode(text)
                    cv2.putText(img, text, (reshaped_coords[0][0][0][0], reshaped_coords[0][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
                
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)

# if __name__ == '__main__':
    # tf.app.run()

# def delete_prev(path):
    
#     for the_file in os.listdir(path):
#         file_path = os.path.join(path, the_file)
#         try:
#             if os.path.isfile(file_path):
#                 os.unlink(file_path)
#             elif os.path.isdir(file_path): shutil.rmtree(file_path)
#         except Exception as e:
#             print(e)
#             continue

# app = Flask(__name__)
# app._static_folder = os.path.basename('static')

# UPLOAD_FOLDER = os.path.join('main', 'uploads')
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# @app.route('/')
# def hello_world():
#     return render_template('home_al.html')

# @app.route('/upload', methods=['POST', 'GET'])
# def upload_file():

#     if request.method == 'POST':
#         file = request.files['image']
#         filename = file.filename

#         # prepare directory for processing
#         delete_prev(app.config['UPLOAD_FOLDER'])
#         f = os.path.join(app.config['UPLOAD_FOLDER'], filename)

#         # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post)
#         file.save(f)

#         tf.app.run()

#         print('done')
#         processed_file = os.path.join('data/res', filename)

#         # return render_template('home_al.html', processed_file = processed_file)
#         return redirect(url_for('send_file', filename=filename))
#         print('redirected to', url_for('send_file', filename=filename))
#     else:

#         print('No request')
#         return render_template('home_al.html')

# # @app.route('/show/<filename>')
# # def uploaded_file(filename):
# #     filename = 'http://127.0.0.1:5000/upload/' + filename
# #     return render_template('home_al.html')

# @app.route('/uploaded/<filename>')
# def send_file(filename):
#     return send_from_directory('data/res', filename)

# app.run(debug=True)
Exemple #5
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                is_reverse, bbox = get_daxie_bbox(boxes, h)

                if not bbox is None:
                    pil_image = Image.fromarray(
                        cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                    pil_image = pil_image.crop(
                        [bbox[0], bbox[1], bbox[4], bbox[5]])
                    if is_reverse == 1:
                        pil_image = pil_image.transpose(Image.ROTATE_180)

                    img = np.array(pil_image.convert('RGB'))[:, :, ::-1]
                    img = cv2.resize(img,
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     os.path.basename(im_fn)), img[:, :, ::-1])
Exemple #6
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    now = datetime.datetime.now()
    StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S")
    os.makedirs(FLAGS.logs_path + StyleTime)
    if not os.path.exists(FLAGS.checkpoint_path):
        os.makedirs(FLAGS.checkpoint_path)

    input_image = tf.placeholder(tf.float32,
                                 shape=[None, None, None, 3],
                                 name='input_image')
    input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox')
    input_im_info = tf.placeholder(tf.float32,
                                   shape=[None, 3],
                                   name='input_im_info')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False)
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    gpu_id = int(FLAGS.gpu)
    with tf.device('/gpu:%d' % gpu_id):
        with tf.name_scope('model_%d' % gpu_id) as scope:
            bbox_pred, cls_pred, cls_prob = model.model(input_image)
            total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss(
                bbox_pred, cls_pred, input_bbox, input_im_info)
            batch_norm_updates_op = tf.group(
                *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
            grads = opt.compute_gradients(total_loss)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
    summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        if FLAGS.restore:
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            restore_step = int(ckpt.split('.')[0].split('_')[-1])
            print("continue training from previous checkpoint {}".format(
                restore_step))
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            restore_step = 0
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers)
        start = time.time()
        for step in range(restore_step, FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _, summary_str = sess.run(
                [model_loss, total_loss, train_op, summary_op],
                feed_dict={
                    input_image: data[0],
                    input_bbox: data[1],
                    input_im_info: data[2]
                })

            summary_writer.add_summary(summary_str, global_step=step)

            if step != 0 and step % FLAGS.decay_steps == 0:
                sess.run(
                    tf.assign(learning_rate,
                              learning_rate.eval() * FLAGS.decay_rate))

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'
                    .format(step, ml, tl, avg_time_per_step,
                            learning_rate.eval()))

            if (step + 1) % FLAGS.save_checkpoint_steps == 0:
                filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt')
                filename = os.path.join(FLAGS.checkpoint_path, filename)
                saver.save(sess,
                           filename,
                           write_meta_graph=False,
                           write_state=True)
                print('Write model to: {:s}'.format(filename))
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                try:
                    im = cv2.imread(im_fn)
                    im = im[:im.shape[0] / 3, :, ::-1]  # only for roi
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                start0 = time.time()
                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                print("resize_image cost time: {:.2f}s".format(time.time() -
                                                               start0))

                start = time.time()
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })
                print("sess.run cost time: {:.2f}s".format(time.time() -
                                                           start))

                start = time.time()
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                print("proposal_layer cost time: {:.2f}s".format(time.time() -
                                                                 start))

                start = time.time()
                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                print("textdetector cost time: {:.2f}s".format(time.time() -
                                                               start))

                print("total cost time: {:.2f}s".format(time.time() - start0))

                for i, box in enumerate(boxes):
                    img = cv2.resize(img[box[1]:box[5], box[0]:box[4]],
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(
                            FLAGS.output_path,
                            os.path.basename(im_fn).replace(
                                '.', '_' + str(i) + '.')), img[:, :, ::-1])
                '''for i, box in enumerate(boxes):
                    cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                  thickness=2)
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])'''

                with open(
                        os.path.join(
                            FLAGS.output_path,
                            os.path.splitext(os.path.basename(im_fn))[0]) +
                        ".txt", "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
Exemple #8
0
def main(argv=None):
    if train_or_test_1800 == 'no_seperate_mianzhi_train' or train_or_test_1800 == 'no_seperate_mianzhi_test':
        if os.path.exists(FLAGS.output_path):
            shutil.rmtree(FLAGS.output_path)
        os.makedirs(FLAGS.output_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            ii = a

            for im_fn in im_fn_list[int(a):b]:  #修改这里
                ii += 1
                print(str(ii) + '===============' + str(ii))
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                try:

                    img, (rh, rw) = resize_image(im)
                    h, w, c = img.shape
                    im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run(
                        [bbox_pred, cls_prob],
                        feed_dict={
                            input_image: [img],
                            input_im_info: im_info
                        })
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                                 im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:
                                        5]  # 每张图片N个poly,textsegs是这些poly的四个坐标。

                    textdetector = TextDetector(DETECT_MODE='H')

                    boxes = textdetector.detect(
                        textsegs, scores[:, np.newaxis],
                        img.shape[:2])  #xzy 方法内部已修改,只显示一个框
                    boxes = np.array(boxes, dtype=np.int)

                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))

                    for i, box in enumerate(boxes):
                        # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                        #               thickness=2)
                        img = img[int(box[1]):int(box[5]),
                                  int(box[0]):int(box[2])]  # xzy 裁剪

                    img = cv2.resize(img,
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     os.path.basename(im_fn)), img[:, :, ::-1])
                except Exception as e:  #xzy   Corrupt JPEG data: premature end of data segment
                    immmm = cv2.imread(
                        "../../../dataset_warm_up/train_data/13X6EGWI.jpg"
                    )  #xzy 可能WBNGQ9R7.jpg出错
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     "xzywa" + str(os.path.basename(im_fn))),
                        immmm[:, :, ::-1])
                    print(str(im_fn) + " is broken!!!!!!!!")
Exemple #9
0
def process():

    output = {
        'path': None,
        'percentage': 0,
        'locate_time': 0,
        'ocr_time': 0,
        'ocr_text': [],
        'err': False
    }

    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    index = 0
    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)
            #file_whole = open('data/res/text/whole.txt','w')
            im_fn_list = get_images()
            start_all = time.time()

            for count, im_fn in enumerate(im_fn_list):
                output["err"] = False
                output["path"] = im_fn
                output["ocr_text"].clear()
                output["percentage"] = count / len(im_fn_list)
                print('===============')
                print(
                    im_fn
                )  #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    output["err"] = True
                    yield output
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                # DETECT_MODE can be H / O depending on context
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                output["locate_time"] = cost_time
                print("cost time: {:.2f}s".format(cost_time))
                '''
                Do the text recognition
                '''

                text_start = time.time()
                grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                ########################################################
                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                    ###################################################
                    # First get the number id
                    startX = box[0]
                    startY = box[1]
                    endX = box[4]
                    endY = box[5]
                    ret, thresh = cv2.threshold(img, 127, 255,
                                                cv2.THRESH_BINARY_INV)
                    roi = thresh[startY:endY, startX:endX]

                    ###################################################
                    # Single out the digit

                    ###################################################

                    # in order to apply Tesseract v4 to OCR text we must supply
                    # (1) a language, (2) an OEM flag of 4, indicating that the we
                    # wish to use the LSTM neural net model for OCR, and finally
                    # (3) an OEM value, in this case, 7 which implies that we are
                    # treating the ROI as a single line of text

                    config = ("-l digits --oem 1 --psm 7")
                    # config = ("--oem 0 -c tessedit_char_whitelist=0123456789")
                    text = pytesseract.image_to_string(roi, config=config)
                    output["ocr_text"].append(text)
                    # add the bounding box coordinates and OCR'd text to the list
                    # of results
                    # Only print if number is detected

                    #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                    if text.isdigit():
                        print(text)
                        if len(text) == 4:
                            data = im_fn.split("/")
                            fn = data[len(data) - 1]  # 73-500_0.jpg
                            folder = data[len(data) - 4] + '/' + data[len(
                                data
                            ) - 3] + '/' + data[
                                len(data) -
                                2]  # recording_2019_10_30/bbq/cam_bbq-8000-18120
                            print(folder + '/' + fn)
                            fn_data = fn.split("-")
                            id_num = fn_data[0]  #73
                            image_name = fn_data[1]  #500_0.jpg

                            directory = 'OCR_text/' + folder + '/'
                            directory = os.path.join(root, directory)
                            if not os.path.exists(directory):
                                os.makedirs(directory)
                            file_whole = open(
                                directory + 'whole-' + id_num + '.txt', 'a')
                            file_whole.write(folder + '/' + fn + ':' + text +
                                             '\n')
                            file_whole.close()
                            #cv2.imwrite(str(index) + '.png', roi)
                            index += 1

# results.append(((startX, startY, endX, endY), text))
                output["ocr_time"] = time.time() - text_start

                ########################################################
                '''
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
                '''
                yield output
            cost_time_all = (time.time() - start_all)
            print("Total cost time: {:.2f}s".format(cost_time_all))
def main(argv=None):
    # if os.path.exists(FLAGS.output_path):
    # shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    # print(FLAGS.output_path)
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        print("init sess")
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state('checkpoints_mlt/')
            model_path = os.path.join(
                'checkpoints_mlt/',
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            #im_fn_list = get_images()
            print('===============')
            im = rotate_img('hoadontiendien-3.png')
            print(im.shape)

            cv2.imwrite('rotated2.png', im[:, :, :])
            print("write rotate img")
            start = time.time()

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            cost_time = (time.time() - start)
            print("cost time: {:.2f}s".format(cost_time))
            min_x, max_x, min_y, max_y = 0, w, 0, h
            box_minx = min([b[0] for b in boxes])
            box_miny = min([b[1] for b in boxes])
            box_maxx = max([b[4] for b in boxes])
            box_maxy = max([b[5] for b in boxes])
            print(box_minx, box_miny)
            print(box_maxx, box_maxy)
            crop_img = img[box_miny:box_maxy, box_minx:box_maxx]
            print(crop_img.shape)

            # for b in boxes:
            # if b[0] <
            # texts = []
            for i, box in enumerate(boxes):
                cv2.polylines(img,
                              [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                              True,
                              color=(0, 255, 0),
                              thickness=1)
                #crop_img2 = img[box[1]-5:box[5]+5, box[0]:box[4]]
            img = cv2.resize(img,
                             None,
                             None,
                             fx=1.0 / rh,
                             fy=1.0 / rw,
                             interpolation=cv2.INTER_LINEAR)
            #print(img[:, :, ::-1].shape)
            #cv2.imshow('aaa',img[:, :, ::-1])
            #cv2.waitKey()

            cv2.imwrite('rotate_cuted2.png', crop_img[:, :, :])
def main(im=None):
    # if os.path.exists(FLAGS.output_path):
    #     shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    checkpoint_path = 'checkpoints_mlt/'
    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.compat.v1.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(
                checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            # print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            # im_fn_list = get_images()
            # for im_fn in im_fn_list:
            # print('===============')
            # print(im_fn)
            # start = time.time()
            # try:
            #     im = cv2.imread(im_fn)[:, :, ::-1]
            # except:
            #     print("Error reading image {}!".format(im_fn))
            #     continue

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            # cost_time = (time.time() - start)
            # print("cost time: {:.2f}s".format(cost_time))

            # for i, box in enumerate(boxes):
            #     cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
            #                   thickness=2)
            # img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
            # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])
            return_array = []

            for i, box in enumerate(boxes):
                box[0] = box[0] / rh
                box[2] = box[2] / rh
                box[1] = box[1] / rw
                box[7] = box[7] / rw
                return_array.append([box[0], box[1], box[2], box[7]])
                # print(return_array)
                # line += ",".join(str(box[k]) for k in [0,1,2,7]) +'),\r\n'
            return return_array
Exemple #12
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                #print('===============')
                #print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                #print("cost time: {:.2f}s".format(cost_time))

                if len(boxes) != 1:
                    print(im_fn, len(boxes))

                flag = -1
                for i, box in enumerate(boxes):
                    #        cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),thickness=2)
                    #img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)

                    arr = np.array(box[:8].astype(np.int32).reshape((-1, 2)))
                    #print(arr)
                    x1 = min(arr[:, 0])
                    x2 = max(arr[:, 0])
                    y1 = min(arr[:, 1])
                    y2 = max(arr[:, 1])
                    pad_w = int((y2 - y1) * 0.5)
                    img_cp = img[y1 - 10:y2 + 10, x1 - pad_w:x2 + pad_w, :]
                    #print(x1,x2,y1,y2)

                    if flag < (x2 - x1) / (y2 - y1):
                        flag = (x2 - x1) / (y2 - y1)

                        if flag > 3:
                            cv2.imwrite(
                                os.path.join(FLAGS.output_path,
                                             os.path.basename(im_fn)),
                                img_cp[:, :, ::-1])
                '''
Exemple #13
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)
            #############################################################下面为新的代码
            # Create SavedModelBuilder class
            # defines where the model will be exported
            export_path_base = FLAGS.export_model_dir
            export_path = os.path.join(
                tf.compat.as_bytes(export_path_base),
                tf.compat.as_bytes(str(FLAGS.model_version)))
            print('Exporting trained model to', export_path)
            builder = tf.saved_model.builder.SavedModelBuilder(export_path)

            # Creates the TensorInfo protobuf objects that encapsulates the input/output tensors
            tensor_info_input = tf.saved_model.utils.build_tensor_info(
                input_image)

            # output tensor info
            bbox_pred_output = tf.saved_model.utils.build_tensor_info(
                bbox_pred)
            cls_pred_output = tf.saved_model.utils.build_tensor_info(cls_pred)
            cls_prob_output = tf.saved_model.utils.build_tensor_info(cls_prob)

            # Defines the DeepLab signatures, uses the TF Predict API
            # It receives an image and its dimensions and output the segmentation mask
            prediction_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={'images': tensor_info_input},
                    outputs={
                        'bbox_pred_output': bbox_pred_output,
                        'cls_pred_output': cls_pred_output,
                        'cls_prob_output': cls_prob_output
                    },
                    method_name=tf.saved_model.signature_constants.
                    PREDICT_METHOD_NAME))

            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={
                    'predict_images': prediction_signature,
                })

            # export the model
            builder.save(as_text=True)
            print('Done exporting!')
Exemple #14
0
def main(argv):
    of_list = []
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # print("dickk")
            # print(sys.argv[1])
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            print(im_fn_list)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                print("printing im.shape")
                print(im.shape)
                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)

                    box_arr = box[:8].astype(np.int32)
                    y = box_arr[0]
                    x = box_arr[1]
                    h = box_arr[2] - box_arr[0]
                    w = box_arr[5] - box_arr[3]

                    img1, (rh, rw) = resize_image(im)
                    img2 = img1[x:x + w, y:y + h, :]
                    cv2.imwrite(FLAGS.output_path + str(i) + '.png', img2)
                    of_list.append(FLAGS.output_path + str(i) + '.png')
Exemple #15
0
    def start(self) :
        self.running = True

        tf.app.flags.DEFINE_string('gpu', '0', '')
        # 已经训练好的模型加载路径
        tf.app.flags.DEFINE_string('checkpoint_path', self.checkpoint_path, '')

        # 图
        with tf.compat.v1.get_default_graph().as_default():
            # 占位符 - 输入图片
            input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
            # 占位符 - 输入图片信息
            input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

            # 创建一个变量 global_step
            global_step = tf.compat.v1.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

            # tensorflow op
            bbox_pred, cls_pred, cls_prob = model.model(input_image)

            variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
            saver = tf.compat.v1.train.Saver(variable_averages.variables_to_restore())

            # tensorflow session 配置
            sessionConfig = tf.compat.v1.ConfigProto(allow_soft_placement=True) 
            # 显存占用率
            # sessionConfig.gpu_options.per_process_gpu_memory_fraction = 0.3
            # 动态申请内存
            sessionConfig.gpu_options.allow_growth = True

            with tf.compat.v1.Session(config=sessionConfig) as sess:

                # 基于 checkpoint 文件(ckpt)加载参数
                ckpt_state = tf.compat.v1.train.get_checkpoint_state(self.checkpoint_path)

                # 模型路径
                model_path = os.path.join(self.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))

                logger.info(u'Restore from {}'.format(model_path))

                # 恢复变量
                saver.restore(sess, model_path)

                while self.running:

                    logger.info(u'等待接收图片')

                    imgFilePath = self.workerQueue.get()
                    if self.is_stop_signal(imgFilePath):
                        logger.info(u'接收到队列停止信号')
                        break

                    logger.info(u'开始处理图片: {}'.format(imgFilePath))

                    # 开始计时
                    start = time.time()
                    try:
                        im = cv2.imread(imgFilePath)[:, :, ::-1]
                    except:
                        logger.exception(sys.exc_info())
                        continue

                    # 压缩图片尺寸,不超过 600 * 1200
                    img, (rh, rw) = self.resize_image(im)
                    # 高、宽、通道数
                    h, w, c = img.shape
                    im_info = np.array([h, w, c]).reshape([1, 3])

                    # 执行运算
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                        feed_dict={input_image: [img],
                                                                    input_im_info: im_info})

                    # 根据RPN目标回归值修正anchors并做排序、nms等后处理输出由proposal坐标和batch_ind全0索引组成的blob
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]

                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])

                    # 结束计时

                    logger.info(u'总计耗时: {}'.format(time.time() - start))

                    if self.debug:
                        with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".json",
                                "w") as f:
                            f.writelines(json.dumps(self.wrapResult(boxes, scores)))

                        # 将 python 数组 转换为 numpy 数组
                        boxes = np.array(boxes, dtype=np.int)

                        for i, box in enumerate(boxes):
                            cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                        thickness=2)
                        img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)

                        cv2.imwrite(os.path.join(self.outputPath, os.path.basename(imgFilePath)), img[:, :, ::-1])

                        with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".txt",
                                "w") as f:
                            for i, box in enumerate(boxes):
                                line = ",".join(str(box[k]) for k in range(8))
                                line += "," + str(scores[i]) + "\n"
                                f.writelines(line)
                    
                    if self.callback :
                        self.callback(fileName = imgFilePath, ctpnRes = self.wrapResult(boxes, scores))
Exemple #16
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    textExtractor = TessaractImpl(CONFIG)

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                dataBoxes = []
                for i, box in enumerate(boxes):
                    crop_img = cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                                  thickness=2)
                    # cv2.imshow("newImage", img)
                    # cv2.waitKey(0)
                    bbx_data = box[:8].astype(np.int32).reshape((-1, 1, 2))
                    startX, startY, endX, endY = crop_image_box(bbx_data)
                    crop_img = img[startY:endY, startX:endX]
                    dataBox = {"boxImg": crop_img}
                    dataBoxes.append(dataBox)

                print(textExtractor.extractData(dataBoxes))
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
Exemple #17
0
def main(argv=None):
    # if os.path.exists(FLAGS.output_path):
    #     shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                img = cv2.imread(im_fn)
                img_size = img.shape
                # 旋转竖的图片
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    im = cv2.transpose(im)
                    im = cv2.flip(im, 0)
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]
                # print(scores)
                # print(textsegs)

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)
                # print(boxes)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # for i, box in enumerate(boxes):
                # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                #               thickness=2)
                img = cv2.resize(img,
                                 None,
                                 None,
                                 fx=1.0 / rh,
                                 fy=1.0 / rw,
                                 interpolation=cv2.INTER_LINEAR)
                # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(
                        os.path.join(FLAGS.output_path, 'txt',
                                     "cpth_result.txt"), "a") as f:
                    for i, box in enumerate(boxes):
                        line = os.path.basename(im_fn)
                        line += ","
                        line += ",".join(str(box[k]) for k in range(8))
                        line += ","
                        line += str(i)
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
                        # print('begin.....')
                        maxy = int(max(box[1:8:2]) / rw)
                        miny = int(min(box[1:8:2]) / rw)
                        maxx = int(max(box[:8:2]) / rh)
                        minx = int(min(box[:8:2]) / rh)
                        # print(img.shape)
                        # print(maxy, miny, maxx, minx)
                        img_new = img[miny:maxy, minx:maxx]
                        cv2.imwrite(
                            os.path.join(
                                FLAGS.output_path, 'img',
                                os.path.basename(im_fn).replace(
                                    '.jpg', '_' + str(i) + '.jpg')), img_new)
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    print(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                texts = []
                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                    crop_img = img[box[1] - 5:box[5] + 5, box[0]:box[4]]
                    #print(crop_img.shape)
                    crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
                    crop_img = unsharp_mask(crop_img)
                    try:
                        text = pytesseract.image_to_string(
                            crop_img, config='-l vie --psm 13')
                    except:
                        print("OCR Error")
                        text = "error"
                    print(text)
                    texts.append(text)

                img = cv2.resize(img,
                                 None,
                                 None,
                                 fx=1.0 / rh,
                                 fy=1.0 / rw,
                                 interpolation=cv2.INTER_LINEAR)
                #print(img[:, :, ::-1].shape)
                #cv2.imshow('aaa',img[:, :, ::-1])
                #cv2.waitKey()
                cv2.imwrite(
                    os.path.join(FLAGS.output_path, os.path.basename(im_fn)),
                    img[:, :, ::-1])

                with open(os.path.join(
                        FLAGS.output_path,
                        os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w",
                          encoding="UTF-8") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(texts[i]) + "\r\n"
                        #print(line)
                        f.writelines(line)