def resize_image(im, max_side_len=1200):
    '''
    resize image to a size multiple of 32 which is required by the network
    :param im: the resized image
    :param max_side_len: limit of max image size to avoid out of memory in gpu
    :return: the resized image and the resize ratio
    '''
    h, w, _ = im.shape

    resize_w = w
    resize_h = h

    # limit the max side
    if max(resize_h, resize_w) > max_side_len:
        ratio = float(
            max_side_len) / resize_h if resize_h > resize_w else float(
                max_side_len) / resize_w
    else:
        ratio = 1.

    #ratio = float(max_side_len) / resize_h if resize_h > resize_w else float(max_side_len) / resize_w

    resize_h = int(resize_h * ratio)
    resize_w = int(resize_w * ratio)

    resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 + 1) * 32
    resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 + 1) * 32
    logger.info('resize_w:{}, resize_h:{}'.format(resize_w, resize_h))
    im = cv2.resize(im, (int(resize_w), int(resize_h)))

    ratio_h = resize_h / float(h)
    ratio_w = resize_w / float(w)

    return im, (ratio_h, ratio_w)
Ejemplo n.º 2
0
def predict(im):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)

            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            start_time = time.time()
            im_resized, (ratio_h, ratio_w) = resize_image(im)
            h, w, _ = im_resized.shape
            timer = {'net': 0, 'pse': 0}
            start = time.time()
            seg_maps = sess.run(seg_maps_pred,
                                feed_dict={input_images: [im_resized]})
            timer['net'] = time.time() - start

            boxes, kernels, timer = detect(seg_maps=seg_maps,
                                           timer=timer,
                                           image_w=w,
                                           image_h=h)

            if boxes is not None:
                boxes = boxes.reshape((-1, 4, 2))
                boxes[:, :, 0] /= ratio_w
                boxes[:, :, 1] /= ratio_h
                h, w, _ = im.shape
                boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

            duration = time.time() - start_time
            logger.info('[timing] {}'.format(duration))

            # return boxes
            return im, boxes
Ejemplo n.º 3
0
def generate_seg(im_size, polys, tags, image_name, scale_ratio):
    '''
    标签生成
    :param im_size: 输入图片尺寸
    :param polys: 输入文本区域
    :param tags: 忽略的文本区域 tags
    :param image_name: 图片名称
    :param scale_ratio:ground truth的收缩比例饿, 默认值为[0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    :return:
    seg_maps: 不同收缩比例的分割结果,保存在不同的通道中
    training_mask: 忽略文本区域的mask
    '''
    h, w = im_size
    #mark different text poly
    seg_maps = np.zeros((h, w, 6), dtype=np.uint8)
    # mask used during traning, to ignore some hard areas
    training_mask = np.ones((h, w), dtype=np.uint8)
    ignore_poly_mark = []
    for i in range(len(scale_ratio)):
        seg_map = np.zeros((h, w), dtype=np.uint8)
        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
            poly = poly_tag[0]
            tag = poly_tag[1]

            # 忽略'*','###','?'等文本区域
            if i == 0 and tag:
                cv2.fillPoly(training_mask,
                             poly.astype(np.int32)[np.newaxis, :, :], 0)
                ignore_poly_mark.append(poly_idx)

            # seg map
            shrinked_polys = []
            if poly_idx not in ignore_poly_mark:
                shrinked_polys = shrink_poly(poly.copy(),
                                             scale_ratio[i])  #收缩区域坐标计算

            if not len(shrinked_polys) and poly_idx not in ignore_poly_mark:
                logger.info(
                    "before shrink poly area:{} len(shrinked_poly) is 0,image {}"
                    .format(abs(pyclipper.Area(poly)), image_name))
                # if the poly is too small, then ignore it during training
                cv2.fillPoly(training_mask,
                             poly.astype(np.int32)[np.newaxis, :, :], 0)
                ignore_poly_mark.append(poly_idx)
                continue
            for shrinked_poly in shrinked_polys:
                seg_map = cv2.fillPoly(
                    seg_map, [np.array(shrinked_poly).astype(np.int32)], 1)

        seg_maps[..., i] = seg_map
    return seg_maps, training_mask
Ejemplo n.º 4
0
def generate_seg(im_size, polys, tags, image_name, scale_ratio):
    '''
    :param im_size: input image size
    :param polys: input text regions
    :param tags: ignore text regions tags
    :param image_index: for log
    :param scale_ratio:ground truth scale ratio, default[0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    :return:
    seg_maps: segmentation results with different scale ratio, save in different channel
    training_mask: ignore text regions
    '''
    h, w = im_size
    #mark different text poly
    seg_maps = np.zeros((h, w, 6), dtype=np.uint8)
    # mask used during traning, to ignore some hard areas
    training_mask = np.ones((h, w), dtype=np.uint8)
    ignore_poly_mark = []
    for i in range(len(scale_ratio)):
        seg_map = np.zeros((h, w), dtype=np.uint8)
        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
            poly = poly_tag[0]
            tag = poly_tag[1]

            # ignore ###
            if i == 0 and tag:
                cv2.fillPoly(training_mask,
                             poly.astype(np.int32)[np.newaxis, :, :], 0)
                ignore_poly_mark.append(poly_idx)

            # seg map
            shrinked_polys = []
            if poly_idx not in ignore_poly_mark:
                shrinked_polys = shrink_poly(poly.copy(), scale_ratio[i])

            if not len(shrinked_polys) and poly_idx not in ignore_poly_mark:
                logger.info(
                    "before shrink poly area:{} len(shrinked_poly) is 0,image {}"
                    .format(abs(pyclipper.Area(poly)), image_name))
                # if the poly is too small, then ignore it during training
                cv2.fillPoly(training_mask,
                             poly.astype(np.int32)[np.newaxis, :, :], 0)
                ignore_poly_mark.append(poly_idx)
                continue
            for shrinked_poly in shrinked_polys:
                seg_map = cv2.fillPoly(
                    seg_map, [np.array(shrinked_poly).astype(np.int32)], 1)

        seg_maps[..., i] = seg_map
    return seg_maps, training_mask
def get_images():
    '''
    find image files in test data path
    :return: list of files found
    '''
    files = []
    exts = ['jpg', 'png', 'jpeg', 'JPG']
    for parent, dirnames, filenames in os.walk(FLAGS.test_data_path):
        for filename in filenames:
            for ext in exts:
                if filename.endswith(ext):
                    files.append(os.path.join(parent, filename))
                    break
    logger.info('Find {} images'.format(len(files)))
    return files
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_seg_maps = tf.placeholder(tf.float32,
                                    shape=[None, None, None, 6],
                                    name='input_score_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    # opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_seg_maps_split = tf.split(input_seg_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isegs = input_seg_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isegs, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
    gpu_options = tf.GPUOptions(allow_growth=True)
    #gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                          allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            logger.info('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            logger.debug(ckpt)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = data_provider.get_batch(
            num_workers=FLAGS.num_readers,
            input_size=FLAGS.input_size,
            batch_size=FLAGS.batch_size_per_gpu * len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_seg_maps: data[2],
                    input_training_masks: data[3]
                })
            if np.isnan(tl):
                logger.error('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                logger.info(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           os.path.join(FLAGS.checkpoint_path, 'model.ckpt'),
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_seg_maps: data[2],
                        input_training_masks: data[3]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
Ejemplo n.º 7
0
def generator(input_size=512,
              batch_size=32,
              background_ratio=3. / 8,
              random_scale=np.array([0.125, 0.25, 0.5, 1, 2.0, 3.0]),
              vis=False,
              scale_ratio=np.array([0.5, 0.6, 0.7, 0.8, 0.9, 1.0])):
    '''
    reference from https://github.com/argman/EAST
    :param input_size:输入图片尺寸
    :param batch_size:训练batch大小
    :param background_ratio:
    :param random_scale:图片随机缩放比例
    :param vis:是否可视化
    :param scale_ratio:ground truth的收缩比例
    :return:
    '''
    image_list = np.array(get_files(['jpg', 'png', 'jpeg', 'JPG']))

    logger.info('{} training images in {}'.format(image_list.shape[0],
                                                  FLAGS.training_data_path))
    index = np.arange(0, image_list.shape[0])

    while True:
        np.random.shuffle(index)
        images = []
        image_fns = []
        seg_maps = []
        training_masks = []
        for i in index:
            try:
                im_fn = image_list[i]
                im = cv2.imread(im_fn)
                if im is None:
                    logger.info(im_fn)
                h, w, _ = im.shape
                txt_fn = im_fn.replace(
                    os.path.basename(im_fn).split('.')[1], 'txt')
                if not os.path.exists(txt_fn):
                    continue

                text_polys, text_tags = load_annoataion(txt_fn)
                if text_polys.shape[0] == 0:
                    continue
                text_polys, text_tags = check_and_validate_polys(
                    text_polys, text_tags, (h, w))  #校验

                # 随机比例缩放图片
                rd_scale = np.random.choice(random_scale)
                im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
                text_polys *= rd_scale
                # random crop a area from image
                if np.random.rand() < background_ratio:
                    # crop background
                    im, text_polys, text_tags = crop_area(
                        im, text_polys, text_tags, crop_background=True)  #随机裁剪
                    if text_polys.shape[0] > 0:
                        # cannot find background
                        continue
                    # pad and resize image
                    new_h, new_w, _ = im.shape
                    #max_h_w_i = np.max([new_h, new_w, input_size])
                    im_padded = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                    im_padded[:new_h, :new_w, :] = im.copy()
                    im = cv2.resize(im_padded, dsize=(input_size, input_size))
                    seg_map_per_image = np.zeros(
                        (input_size, input_size, scale_ratio.shape[0]),
                        dtype=np.uint8)
                    training_mask = np.ones((input_size, input_size),
                                            dtype=np.uint8)
                else:
                    im, text_polys, text_tags = crop_area(
                        im, text_polys, text_tags,
                        crop_background=False)  #随机裁剪
                    if text_polys.shape[0] == 0:
                        continue
                    # h, w, _ = im.shape

                    # pad the image to the training input size or the longer side of image
                    new_h, new_w, _ = im.shape
                    #max_h_w_i = np.max([new_h, new_w, input_size])
                    im_padded = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                    im_padded[:new_h, :new_w, :] = im.copy()
                    im = im_padded
                    # resize the image to input size
                    new_h, new_w, _ = im.shape
                    resize_h = input_size
                    resize_w = input_size
                    im = cv2.resize(im, dsize=(resize_w, resize_h))
                    resize_ratio_3_x = resize_w / float(new_w)
                    resize_ratio_3_y = resize_h / float(new_h)
                    text_polys[:, :, 0] *= resize_ratio_3_x
                    text_polys[:, :, 1] *= resize_ratio_3_y
                    new_h, new_w, _ = im.shape
                    seg_map_per_image, training_mask = generate_seg(
                        (new_h, new_w), text_polys, text_tags, image_list[i],
                        scale_ratio)
                    if not len(seg_map_per_image):
                        logger.info("len(seg_map)==0 image: %d " % i)
                        continue
                # 分割结果可视化
                if vis:
                    fig, axs = plt.subplots(3, 3, figsize=(20, 30))
                    axs[0, 0].imshow(im[..., ::-1])
                    axs[0, 0].set_xticks([])
                    axs[0, 0].set_yticks([])
                    axs[0, 1].imshow(seg_map_per_image[..., 0])
                    axs[0, 1].set_xticks([])
                    axs[0, 1].set_yticks([])
                    axs[0, 2].imshow(seg_map_per_image[..., 1])
                    axs[0, 2].set_xticks([])
                    axs[0, 2].set_yticks([])
                    axs[1, 0].imshow(seg_map_per_image[..., 2])
                    axs[1, 0].set_xticks([])
                    axs[1, 0].set_yticks([])
                    axs[1, 1].imshow(seg_map_per_image[..., 3])
                    axs[1, 1].set_xticks([])
                    axs[1, 1].set_yticks([])
                    axs[1, 2].imshow(seg_map_per_image[..., 4])
                    axs[1, 2].set_xticks([])
                    axs[1, 2].set_yticks([])
                    axs[2, 0].imshow(seg_map_per_image[..., 5])
                    axs[2, 0].set_xticks([])
                    axs[2, 0].set_yticks([])
                    axs[2, 1].imshow(training_mask)
                    axs[2, 1].set_xticks([])
                    axs[2, 1].set_yticks([])
                    plt.tight_layout()
                    plt.show()
                    plt.close()

                images.append(im[..., ::-1].astype(np.float32))
                image_fns.append(im_fn)
                seg_maps.append(seg_map_per_image[::4, ::4, :].astype(
                    np.float32))  #1/4图
                training_masks.append(training_mask[::4, ::4,
                                                    np.newaxis].astype(
                                                        np.float32))  #1/4图

                if len(images) == batch_size:
                    yield images, image_fns, seg_maps, training_masks
                    images = []
                    image_fns = []
                    seg_maps = []
                    training_masks = []
            except Exception as e:
                traceback.print_exc()
                continue
def main(argv=None):

    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in range(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=2)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

    #===========================================================================================================
    #Converting to 4-co-ordinates txt

    path = test_data_path + '/'  #input_images
    gt_path = output_dir + '/'  #8 co-ordinates txt
    out_path = APP_ROOT + '/output_label'  #4 co-ordinates txt

    if not os.path.exists(out_path):
        os.makedirs(out_path)
    else:
        shutil.rmtree(out_path)
        os.mkdir(out_path)

    files = os.listdir(path)
    files.sort()
    #files=files[:100]
    for file in files:
        _, basename = os.path.split(file)
        if basename.lower().split('.')[-1] not in ['jpg', 'png', 'jpeg']:
            continue
        stem, ext = os.path.splitext(basename)
        gt_file = os.path.join(gt_path + stem + '.txt')
        img_path = os.path.join(path, file)
        print('Reading image ' + os.path.splitext(file)[0])
        img = cv.imread(img_path)
        img_size = img.shape
        im_size_min = np.min(img_size[0:2])
        im_size_max = np.max(img_size[0:2])

        with open(gt_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            splitted_line = line.strip().lower().split(',')
            pt_x = np.zeros((4, 1))
            pt_y = np.zeros((4, 1))
            pt_x[0, 0] = int(float(splitted_line[0]))
            pt_y[0, 0] = int(float(splitted_line[1]))
            pt_x[1, 0] = int(float(splitted_line[2]))
            pt_y[1, 0] = int(float(splitted_line[3]))
            pt_x[2, 0] = int(float(splitted_line[4]))
            pt_y[2, 0] = int(float(splitted_line[5]))
            pt_x[3, 0] = int(float(splitted_line[6]))
            pt_y[3, 0] = int(float(splitted_line[7]))

            ind_x = np.argsort(pt_x, axis=0)
            pt_x = pt_x[ind_x]
            pt_y = pt_y[ind_x]

            if pt_y[0] < pt_y[1]:
                pt1 = (pt_x[0], pt_y[0])
                pt3 = (pt_x[1], pt_y[1])
            else:
                pt1 = (pt_x[1], pt_y[1])
                pt3 = (pt_x[0], pt_y[0])

            if pt_y[2] < pt_y[3]:
                pt2 = (pt_x[2], pt_y[2])
                pt4 = (pt_x[3], pt_y[3])
            else:
                pt2 = (pt_x[3], pt_y[3])
                pt4 = (pt_x[2], pt_y[2])

            xmin = int(min(pt1[0], pt2[0]))
            ymin = int(min(pt1[1], pt2[1]))
            xmax = int(max(pt2[0], pt4[0]))
            ymax = int(max(pt3[1], pt4[1]))

            if xmin < 0:
                xmin = 0
            if xmax > img_size[1] - 1:
                xmax = img_size[1] - 1
            if ymin < 0:
                ymin = 0
            if ymax > img_size[0] - 1:
                ymax = img_size[0] - 1

            with open(os.path.join(out_path, stem) + '.txt', 'a') as f:
                f.writelines(str(int(xmin)))
                f.writelines(" ")
                f.writelines(str(int(ymin)))
                f.writelines(" ")
                f.writelines(str(int(xmax)))
                f.writelines(" ")
                f.writelines(str(int(ymax)))
                f.writelines("\n")
Ejemplo n.º 9
0
def main(argv=None):
    import os
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    t0 = time.time()
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    im_fn_list = get_images()
    for im_fn in im_fn_list:
        points_list = []
        tf.reset_default_graph()
        with tf.get_default_graph().as_default():
            input_images = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 3],
                                          name='input_images')
            global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0),
                trainable=False)
            seg_maps_pred = model.model(input_images, is_training=False)

            variable_averages = tf.train.ExponentialMovingAverage(
                0.997, global_step)
            saver = tf.train.Saver(variable_averages.variables_to_restore())
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=True)) as sess:
                ckpt_state = tf.train.get_checkpoint_state(
                    FLAGS.checkpoint_path)
                model_path = os.path.join(
                    FLAGS.checkpoint_path,
                    os.path.basename(ckpt_state.model_checkpoint_path))

                logger.info('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)

                im = cv2.imread(im_fn)[:, :, ::-1]
                draw_img = im[:, :, ::-1].copy()
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in range(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))

                            yDim, xDim = im[:, :, ::-1].shape[:2]
                            if box[0, 0] > box[2, 0]:  # box point1在右下角,顺时针
                                pt1 = (max(1, box[2, 0]), max(1, box[2, 1]))
                                pt2 = (box[3, 0], box[3, 1])
                                pt3 = (min(box[0, 0],
                                           xDim - 2), min(yDim - 2, box[0, 1]))
                                pt4 = (box[1, 0], box[1, 1])
                            else:  # box point1在左下角, 顺时针
                                pt1 = (max(1, box[1, 0]), max(1, box[2, 1]))
                                pt2 = (box[2, 0], box[2, 1])
                                pt3 = (min(box[3, 0],
                                           xDim - 2), min(yDim - 2, box[3, 1]))
                                pt4 = (box[0, 0], box[0, 1])

                            points = [pt1, pt2, pt3, pt4]
                            points_list.append(points)

                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=2)

        tf.reset_default_graph()
        keras.backend.clear_session()
        input = Input(shape=(32, None, 1), name='the_input')
        y_pred = dense_cnn(input, nclass)
        recognition_model = Model(input=input, outputs=y_pred)
        model_path = './recognition/...'
        recognition_model.load_weights(model_path)
        if os.path.exists(model_path):
            print('loading models')
        else:
            print('model do not exist')
            break

        j = 0
        txt_path = os.path.join(FLAGS.output_dir,
                                im_fn.split('/')[-1].split('.')[0])
        with open('{}.txt'.format(txt_path), 'a', encoding='utf-8') as outf:
            for points in points_list:
                j += 1
                pt1 = points[0]
                pt2 = points[1]
                pt3 = points[2]
                pt4 = points[3]
                degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0]))
                text_img = dumpRotateImage(im[:, :, ::-1], degree, pt1, pt2,
                                           pt3, pt4)
                text_img = cv2.cvtColor(text_img, cv2.COLOR_BGR2GRAY)

                text_h, text_w = text_img.shape[:2]
                if text_h // text_w > 1:
                    continue
                dst_h = 32
                dst_w = text_w * dst_h // text_h
                text_img = cv2.resize(text_img, (dst_w, dst_h))
                X = text_img.reshape([1, 32, -1, 1])
                y_pred = recognition_model.predict(X)
                y_pred = y_pred[:, :, :]
                out = _decode(y_pred)
                img_PIL = Image.fromarray(
                    cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB))
                font = ImageFont.truetype('./utils/simsun.ttc', 12)
                fillColor = (255, 0, 0)
                draw = ImageDraw.Draw(img_PIL)
                if out is None:
                    out = ''
                draw.text(pt4, out, font=font, fill=fillColor)
                draw_img = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
                outf.write('{}. \t{}\n'.format(j, out))

            if not FLAGS.no_write_images:
                img_path = os.path.join(FLAGS.output_dir,
                                        os.path.basename(im_fn))
                cv2.imwrite(img_path, draw_img)

    print('total time = ', time.time() - t0)
Ejemplo n.º 10
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    if not os.path.isdir(os.path.join(FLAGS.output_dir, "crop")):
        os.makedirs(os.path.join(FLAGS.output_dir, "crop"))

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            #          ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            #           model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            #            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, FLAGS.checkpoint_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in xrange(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
                            if not FLAGS.is_cropping:
                                cv2.polylines(
                                    im[:, :, ::-1],
                                    [box.astype(np.int32).reshape((-1, 1, 2))],
                                    True,
                                    color=(255, 255, 0),
                                    thickness=2)
                            else:
                                lt_x = box[2, 0]
                                lt_y = box[2, 1]
                                rt_x = box[3, 0]
                                rt_y = box[3, 1]
                                lb_x = box[1, 0]
                                lb_y = box[1, 1]
                                rb_x = box[0, 0]
                                rb_y = box[0, 1]
                                if lt_x > lb_x:
                                    lt_x = lb_x
                                if lt_y > rt_y:
                                    lt_y = rt_y
                                if rt_x < rb_x:
                                    rt_x = rb_x
                                if rt_y > lt_y:
                                    rt_y = lt_y
                                if lb_x > lt_x:
                                    lb_x = lt_x
                                if lb_y < rb_y:
                                    lb_y = rb_y
                                if rb_x < rt_x:
                                    rb_x = rt_x
                                if rb_y < lb_y:
                                    rb_y = lb_y


#                                 padding = 3
#                                 lt_x -= padding
#                                 lt_y -= padding
#                                 lb_x -= padding
#                                 lb_y += padding
#                                 rt_x += padding
#                                 rt_y -= padding
#                                 rb_x += padding
#                                 rb_y += padding
                                crop_img = im[int(lt_y):int(lb_y),
                                              int(lt_x):int(rt_x)]
                                cv2.imwrite(
                                    os.path.join(FLAGS.output_dir, "crop",
                                                 ("%d_" % i) +
                                                 os.path.basename(im_fn)),
                                    crop_img[:, :, ::-1])

                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])
Ejemplo n.º 11
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    input_dir = '../test_images'
    output_dir = '../test_images_result'
    img_path = '../test_images/IMG_20190107_103022.jpg'
    # img_path = '../test_images/IMG_20190108_162848.jpg'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    g1 = tf.Graph()  # 加载到Session 1的graph
    sess1 = tf.Session(graph=g1, config=config)  # Session1
    # 加载第一个模型
    with sess1.as_default():
        with g1.as_default():
            with tf.gfile.FastGFile('../models/ocr_angle.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')
            preprocessing_name1 = 'resnet_v1_50'
            image_preprocessing_fn1 = preprocessing_factory.get_preprocessing(
                preprocessing_name1, is_training=False)
            test_image_size1 = 224
            graph = tf.get_default_graph()
            predictions1 = graph.get_tensor_by_name(
                'resnet_v1_50/predictions/Reshape_1:0')
            tensor_input1 = graph.get_tensor_by_name('Placeholder:0')
            tensor_item1 = tf.placeholder(tf.float32, [None, None, 3])
            processed_image1 = image_preprocessing_fn1(tensor_item1,
                                                       test_image_size1,
                                                       test_image_size1)

    g2 = tf.Graph()
    sess2 = tf.Session(graph=g2, config=config)
    with sess2.as_default():
        with g2.as_default():
            with tf.gfile.FastGFile('../models/pse_invoice.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            graph = tf.get_default_graph()
            input_images2 = graph.get_tensor_by_name('input_images:0')
            seg_maps_pred2 = graph.get_tensor_by_name('Sigmoid:0')

    g3 = tf.Graph()
    sess3 = tf.Session(graph=g3, config=config)
    with sess3.as_default():
        with g3.as_default():
            with tf.gfile.FastGFile('../models/ocr_field.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            preprocessing_name3 = 'resnet_v1_50'
            image_preprocessing_fn3 = preprocessing_factory.get_preprocessing(
                preprocessing_name3, is_training=False)
            output_height3, output_width3 = 100, 400
            graph = tf.get_default_graph()
            predictions3 = graph.get_tensor_by_name(
                'resnet_v1_50/predictions/Reshape_1:0')
            tensor_input3 = graph.get_tensor_by_name('Placeholder:0')
            tensor_item3 = tf.placeholder(tf.float32, [None, None, 3])
            processed_image3 = image_preprocessing_fn3(tensor_item3,
                                                       output_height3,
                                                       output_width3)

    g4 = tf.Graph()
    sess4 = tf.Session(graph=g4, config=config)
    with sess4.as_default():
        with g4.as_default():
            with tf.gfile.FastGFile('../models/ocr_code.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            graph = tf.get_default_graph()
            image_inputs4 = graph.get_tensor_by_name('image_inputs:0')
            logits = graph.get_tensor_by_name('transpose_2:0')
            shape = tf.shape(logits)
            seq_len = tf.reshape(shape[0], [-1])
            seq_len = tf.tile(seq_len, [shape[1]])
            greedy_decoder = tf.nn.ctc_greedy_decoder(logits, seq_len)
            decoded4 = greedy_decoder[0]

    g5 = tf.Graph()
    sess5 = tf.Session(graph=g5, config=config)
    with sess5.as_default():
        with g5.as_default():
            with tf.gfile.FastGFile('../models/ocr_number.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            graph = tf.get_default_graph()
            image_inputs5 = graph.get_tensor_by_name('image_inputs:0')
            logits = graph.get_tensor_by_name('transpose_2:0')
            shape = tf.shape(logits)
            seq_len = tf.reshape(shape[0], [-1])
            seq_len = tf.tile(seq_len, [shape[1]])
            greedy_decoder = tf.nn.ctc_greedy_decoder(logits, seq_len)
            decoded5 = greedy_decoder[0]

    g6 = tf.Graph()
    sess6 = tf.Session(graph=g6, config=config)
    with sess6.as_default():
        with g6.as_default():
            with tf.gfile.FastGFile('../models/ocr_price.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            graph = tf.get_default_graph()
            image_inputs6 = graph.get_tensor_by_name('image_inputs:0')
            logits = graph.get_tensor_by_name('transpose_2:0')
            shape = tf.shape(logits)
            seq_len = tf.reshape(shape[0], [-1])
            seq_len = tf.tile(seq_len, [shape[1]])
            greedy_decoder = tf.nn.ctc_greedy_decoder(logits, seq_len)
            decoded6 = greedy_decoder[0]

    g7 = tf.Graph()
    sess7 = tf.Session(graph=g7, config=config)
    with sess7.as_default():
        with g7.as_default():
            with tf.gfile.FastGFile('../models/ocr_date.pb', 'rb') as f:
                # 使用tf.GraphDef()定义一个空的Graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                # Imports the graph from graph_def into the current default Graph.
                tf.import_graph_def(graph_def, name='')

            graph = tf.get_default_graph()
            image_inputs7 = graph.get_tensor_by_name('image_inputs:0')
            logits = graph.get_tensor_by_name('transpose_2:0')
            shape = tf.shape(logits)
            seq_len = tf.reshape(shape[0], [-1])
            seq_len = tf.tile(seq_len, [shape[1]])
            greedy_decoder = tf.nn.ctc_greedy_decoder(logits, seq_len)
            decoded7 = greedy_decoder[0]

    # 使用的时候
    with sess1.as_default():
        with sess1.graph.as_default():
            sess1.run(tf.global_variables_initializer())
            src = cv2.imread(img_path)
            image = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (test_image_size1, test_image_size1))
            height, width = image.shape[0], image.shape[1]
            image = np.reshape(image, [height, width, 3])
            image = sess1.run(processed_image1,
                              feed_dict={tensor_item1: image})
            logi = sess1.run(predictions1, feed_dict={tensor_input1:
                                                      [image]})[0]
            prediction = np.argmax(logi)
            degree = labels_angle[prediction]
            print(img_path, degree)

    # 使用的时候
    detected_rects = {}
    with sess2.as_default():
        with sess2.graph.as_default():
            im_src = rotate(src, degree)
            im = copy.deepcopy(im_src)
            im = im[:, :, ::-1]
            # im = cv2.imread(img_path)[:, :, ::-1]
            logger.debug('image file:{}'.format(img_path))
            os.makedirs(output_dir, exist_ok=True)
            start_time = time.time()
            im_resized, (ratio_h, ratio_w) = resize_image(im)
            h, w, _ = im_resized.shape
            timer = {'net': 0, 'pse': 0}
            start = time.time()
            seg_maps = sess2.run(seg_maps_pred2,
                                 feed_dict={input_images2: [im_resized]})
            timer['net'] = time.time() - start

            boxes, kernels, timer = detect(seg_maps=seg_maps,
                                           timer=timer,
                                           image_w=w,
                                           image_h=h)
            logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                img_path, timer['net'] * 1000, timer['pse'] * 1000))

            if boxes is not None:
                boxes = boxes.reshape((-1, 4, 2))
                boxes[:, :, 0] /= ratio_w
                boxes[:, :, 1] /= ratio_h
                h, w, _ = im.shape
                boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

            duration = time.time() - start_time
            logger.info('[timing] {}'.format(duration))
            if boxes is not None:
                num = 0
                for i in range(len(boxes)):
                    # to avoid submitting errors
                    box = boxes[i]
                    if np.linalg.norm(box[0] - box[1]) < 2 or np.linalg.norm(
                            box[3] - box[0]) < 2:
                        continue
                    cv2.polylines(im[:, :, ::-1],
                                  [box.astype(np.int32).reshape((-1, 1, 2))],
                                  True,
                                  color=(255, 255, 0),
                                  thickness=2)
                    detected_rects[num] = [
                        box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0],
                        box[2, 1], box[3, 0], box[3, 1]
                    ]
                    num += 1

                # img_text_crop.work(im_src, res_file, output_dir)

            img_path = os.path.join(output_dir, os.path.basename(img_path))
            cv2.imwrite(img_path, im[:, :, ::-1])
            # show_score_geo(im_resized, kernels, im)

    img_items = {}
    img_items_normal = {}
    img_text_crop.work_for_array(im_src, detected_rects, img_items,
                                 img_items_normal)

    ocr_items = {}
    with sess3.as_default():
        with sess3.graph.as_default():
            sess3.run(tf.global_variables_initializer())
            min_value = 5000
            price_index = 0
            for key in img_items_normal.keys():
                image = img_items_normal[key]
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (output_width3, output_height3))
                height, width = image.shape[0], image.shape[1]
                image = np.reshape(image, [height, width, 3])
                image = sess3.run(processed_image3,
                                  feed_dict={tensor_item3: image})
                logi = sess3.run(predictions3,
                                 feed_dict={tensor_input3: [image]})[0]
                prediction = int(np.argmax(logi))
                if prediction < 4:
                    if prediction != 2:
                        ocr_items[prediction] = img_items[key]
                    else:
                        x = int(detected_rects[key][0])
                        if x < min_value:
                            min_value = x
                            price_index = key
            ocr_items[2] = img_items[price_index]

    with sess4.as_default():
        with sess4.graph.as_default():
            spliter4 = ocr_code_utils.OcrSpliter()
            image = spliter4.ocr_split('', ocr_items[0])
            # image = cv2.imread(path, 0)
            imgw = np.asarray(image, dtype=np.float32)
            image = (imgw - np.mean(imgw)) / np.std(imgw)
            image = np.reshape(image, [image.shape[0], image.shape[1], 1])
            feed = {image_inputs4: [image]}
            predictions_result = sess4.run(decoded4[0], feed_dict=feed)
            predictions_result = sparse_tuple_to(predictions_result,
                                                 max_sequence_length_code)
            predictions_result = predictions_result[0]
            predictions_result = [
                labels_code[label] for label in predictions_result
            ]
            pred = ''
            for ch in predictions_result:
                if ch not in ['<BLANK>']:
                    pred += ch
            print('invoice_code:', pred)

    with sess5.as_default():
        with sess5.graph.as_default():
            spliter5 = ocr_number_utils.OcrSpliter()
            image = spliter5.ocr_split('', ocr_items[1])
            # image = cv2.imread(path, 0)
            imgw = np.asarray(image, dtype=np.float32)
            image = (imgw - np.mean(imgw)) / np.std(imgw)
            image = np.reshape(image, [image.shape[0], image.shape[1], 1])
            feed = {image_inputs5: [image]}
            predictions_result = sess5.run(decoded5[0], feed_dict=feed)
            predictions_result = sparse_tuple_to(predictions_result,
                                                 max_sequence_length_number)
            predictions_result = predictions_result[0]
            predictions_result = [
                labels_number[label] for label in predictions_result
            ]
            pred = ''
            for ch in predictions_result:
                if ch not in ['<BLANK>']:
                    pred += ch
            print('invoice_number:', pred)

    with sess6.as_default():
        with sess6.graph.as_default():
            spliter6 = ocr_price_utils.OcrSpliter()
            image = spliter6.ocr_split('', ocr_items[2])
            # image = cv2.imread(path, 0)
            imgw = np.asarray(image, dtype=np.float32)
            image = (imgw - np.mean(imgw)) / np.std(imgw)
            image = np.reshape(image, [image.shape[0], image.shape[1], 1])
            feed = {image_inputs6: [image]}
            predictions_result = sess6.run(decoded6[0], feed_dict=feed)
            predictions_result = sparse_tuple_to(predictions_result,
                                                 max_sequence_length_price)
            predictions_result = predictions_result[0]
            predictions_result = [
                labels_price[label] for label in predictions_result
            ]
            pred = ''
            for ch in predictions_result:
                if ch not in ['<BLANK>']:
                    pred += ch
            pred = pred[:-2] + '.' + pred[-2:]
            print('invoice_price:', pred)

    with sess7.as_default():
        with sess7.graph.as_default():
            spliter7 = ocr_date_utils.OcrSpliter()
            image = spliter7.ocr_split('', ocr_items[3])
            # image = cv2.imread(path, 0)
            imgw = np.asarray(image, dtype=np.float32)
            image = (imgw - np.mean(imgw)) / np.std(imgw)
            image = np.reshape(image, [image.shape[0], image.shape[1], 1])
            feed = {image_inputs7: [image]}
            predictions_result = sess7.run(decoded7[0], feed_dict=feed)
            predictions_result = sparse_tuple_to(predictions_result,
                                                 max_sequence_length_date)
            predictions_result = predictions_result[0]
            predictions_result = [
                labels_date[label] for label in predictions_result
            ]
            pred = ''
            for ch in predictions_result:
                if ch not in ['<BLANK>']:
                    pred += ch
            print('invoice_date:', pred)

    # 关闭sess
    sess1.close()
    sess2.close()
    sess3.close()
    sess4.close()
    sess5.close()
    sess6.close()
    sess7.close()