Beispiel #1
0
def get_predictor(checkpoint_path):
    logger.info('loading model')
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    f_score, f_geometry = model.model(input_images, is_training=False)

    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
    model_path = os.path.join(
        checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
    logger.info('Restore from {}'.format(model_path))
    saver.restore(sess, model_path)
    return sess, f_score, f_geometry, input_images, global_step
    def east_detect(self):
        # import os
        # os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_list

        try:
            os.makedirs(self.output_dir)
        except OSError as e:
            if e.errno != 17:
                raise

        #with tf.device('/device:GPU:0'):
        with tf.Graph().as_default():
            input_images = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 3],
                                          name='input_images')
            global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0),
                trainable=False)

            f_score, f_geometry = model.model(input_images, is_training=False)

            variable_averages = tf.train.ExponentialMovingAverage(
                0.997, global_step)
            saver = tf.train.Saver(variable_averages.variables_to_restore())

            # 创建会话
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=True)) as sess:
                # 初始化模型参数:从checkpoint文件导入
                ckpt_state = tf.train.get_checkpoint_state(self.checkpoint_dir)
                model_path = os.path.join(
                    self.checkpoint_dir,
                    os.path.basename(ckpt_state.model_checkpoint_path))
                print('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)

                im_fn_list = self.get_images()
                for im_fn in im_fn_list:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    start_time = time.time()
                    im_resized, (ratio_h, ratio_w) = self.resize_image(im)

                    timer = {'net': 0, 'restore': 0, 'nms': 0}
                    start = time.time()
                    score, geometry = sess.run(
                        [f_score, f_geometry],
                        feed_dict={input_images: [im_resized]})
                    timer['net'] = time.time() - start

                    boxes, timer = self.detect(score_map=score,
                                               geo_map=geometry,
                                               timer=timer)
                    print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.
                          format(im_fn, timer['net'] * 1000,
                                 timer['restore'] * 1000, timer['nms'] * 1000))

                    if boxes is not None:
                        boxes = boxes[:, :8].reshape((-1, 4, 2))
                        boxes[:, :, 0] /= ratio_w
                        boxes[:, :, 1] /= ratio_h

                    duration = time.time() - start_time
                    print('[timing] {}'.format(duration))

                    # save to file
                    if boxes is not None:
                        res_file = os.path.join(
                            self.output_dir, '{}.txt'.format(
                                os.path.basename(im_fn).split('.')[0]))

                        with open(res_file, 'w') as f:
                            for i, box in enumerate(boxes):
                                # to avoid submitting errors
                                box = self.sort_poly(box.astype(np.int32))
                                if np.linalg.norm(
                                        box[0] - box[1]) < 5 or np.linalg.norm(
                                            box[3] - box[0]) < 5:
                                    continue
                                f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                    box[0, 0],
                                    box[0, 1],
                                    box[1, 0],
                                    box[1, 1],
                                    box[2, 0],
                                    box[2, 1],
                                    box[3, 0],
                                    box[3, 1],
                                ))
                                #cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True,
                                #color=(255, 255, 0), thickness=1)
                                self.cut_roi(im[:, :, ::-1], box, im_fn, i)
                    #if not self.no_write_images:
                    #img_path = os.path.join(self.output_dir, os.path.basename(im_fn))
                    #cv2.imwrite(img_path, im[:, :, ::-1])
                sess.close()
import tensorflow as tf
from east import model
from east.icdar import restore_rectangle
from east import lanms
from east.eval import resize_image, sort_poly, detect

checkpoint_path = "east_icdar2015_resnet_v1_50_rbox/"

input_images = tf.placeholder(tf.float32,
                              shape=[None, None, None, 3],
                              name='input_images')

global_step = tf.get_variable('global_step', [],
                              initializer=tf.constant_initializer(0),
                              trainable=False)
f_score, f_geometry = model.model(input_images, is_training=False)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())

config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.per_process_gpu_memory_fraction = 0.65
sess = tf.Session(config=config)

ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
model_path = os.path.join(checkpoint_path,
                          os.path.basename(ckpt_state.model_checkpoint_path))
logger.info('Restore from {}'.format(model_path))
saver.restore(sess, model_path)


@functools.lru_cache(maxsize=1)
Beispiel #4
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                timer = {'net': 0, 'restore': 0, 'nms': 0}
                start = time.time()
                score, geometry = sess.run(
                    [f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                print(
                    '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        im_fn, timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))

                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0],
                                box[0, 1],
                                box[1, 0],
                                box[1, 1],
                                box[2, 0],
                                box[2, 1],
                                box[3, 0],
                                box[3, 1],
                            ))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=1)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

            model_path = './crnn/crnn.pth'
            alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
            model_crnn = crnn.CRNN(32, 1, 37, 256)
            # if torch.cuda.is_available():
            #  model_crnn = model_crnn.cuda()
            print('loading pretrained model from %s' % model_path)
            model_crnn.load_state_dict(torch.load(model_path))

            converter = utils.strLabelConverter(alphabet)
            transformer = dataset.resizeNormalize((100, 32))
            seq = re.compile(",")
            with open('./output/img_demo.txt') as f:
                img = cv2.imread('./test_img/img_demo.jpg')
                line_id = 0
                with open('./output/output.txt', 'w') as fp:
                    for line in f:
                        line_id += 1
                        lst = seq.split(line.strip())
                        x1 = int(lst[0])
                        y1 = int(lst[1])
                        x2 = int(lst[2])
                        y2 = int(lst[3])
                        x3 = int(lst[4])
                        y3 = int(lst[5])
                        x4 = int(lst[6])
                        y4 = int(lst[7])
                        cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4,
                                                                       y4]])
                        rect = cv2.minAreaRect(cnt)
                        # print(rect)
                        box = cv2.boxPoints(rect)
                        box = np.int0(box)
                        # print(box)
                        roi_img = img[min(box[:, 1]):max(box[:, 1]),
                                      min(box[:, 0]):max(box[:, 0])]
                        # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1]))
                        cv2.imwrite(
                            './output/word_area_img/word_area_img' +
                            str(line_id) + '.png', roi_img)
                        img_path = './output/word_area_img/word_area_img' + str(
                            line_id) + '.png'
                        image = Image.open(img_path).convert('L')
                        image = transformer(image)
                        # if torch.cuda.is_available():
                        #  image = image.cuda()
                        image = image.view(1, *image.size())
                        image = Variable(image)
                        model_crnn.eval()
                        preds = model_crnn(image)
                        _, preds = preds.max(2)
                        preds = preds.transpose(1, 0).contiguous().view(-1)

                        preds_size = Variable(torch.IntTensor([preds.size(0)]))
                        raw_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=True)
                        sim_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=False)
                        print('%-20s => %-20s' % (raw_pred, sim_pred))
                        fp.write(sim_pred)
                        fp.write('\n')
Beispiel #5
0
def process_images(dir_name, split_names, images_indices, checkpoint_path,
                   crnn_path):
    ### There will be two separate graphs, one for the EAST detection part and another for
    ### the crnn part
    east_graph = tf.Graph()
    with east_graph.as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        east_saver = tf.train.Saver(variable_averages.variables_to_restore())

    ## Now the crnn_model
    crnn_graph = tf.Graph()
    with crnn_graph.as_default():
        cropped_image = tf.placeholder(dtype=tf.float32,
                                       shape=[1, 32, 100, 3],
                                       name='cropped_image')
        word_recog = ShadowNet(phase='Test',
                               hidden_nums=256,
                               layers_nums=2,
                               seq_length=25,
                               num_classes=37)
        with tf.variable_scope('shadow'):
            recog = word_recog.build_shadownet(inputdata=cropped_image)
        decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=recog,
                                                   sequence_length=25 *
                                                   np.ones(1),
                                                   merge_repeated=False)
        decoder = data_utils.TextFeatureIO()
        crnn_saver = tf.train.Saver()

    ### loading the checkpoint
    east_session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True),
                              graph=east_graph)
    with east_graph.as_default():
        with east_session.as_default():
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(
                checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            east_saver.restore(east_session, model_path)

    crnn_session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True),
                              graph=crnn_graph)
    with crnn_graph.as_default():
        with crnn_session.as_default():
            crnn_saver.restore(crnn_session, save_path=crnn_path)

    for image_name in generate_filename(dir_name, split_names, images_indices):
        print('processing {}'.format(image_name))
        box_list = []
        smaller_image_list = []
        centers = []
        words_list = []
        final_boxes = []
        file_name = image_name.split('.')[0]
        file_name = file_name + '.txt'
        print(image_name)
        im = cv2.imread(image_name)[:, :, ::-1]
        im_resized, (ratio_h, ratio_w) = resize_image(im)
        with east_session.as_default():
            with east_graph.as_default():
                score, geometry = east_session.run(
                    [f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})
                boxes = detect(score, geometry)
                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                for box in boxes:
                    box = sort_poly(box.astype(np.int32))
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(
                            box[3] - box[0]) < 5:
                        continue
                    x_range, y_range = convert_to_rect(box)
                    smaller_image = im[y_range, x_range, :]
                    smaller_image_list.append(smaller_image)
                    box_list.append(box)
                    centers.append(((y_range.start + y_range.stop) / 2.0,
                                    (x_range.start + x_range.stop) / 2.0))
        print('East done one the image {}'.format(image_name))

        smaller_images_sorted, box_list = sort_by_pos(smaller_image_list,
                                                      box_list, centers,
                                                      im.shape)
        with crnn_session.as_default():
            with crnn_graph.as_default():
                for box, smaller_image in zip(box_list, smaller_images_sorted):
                    smaller_im = cv2.resize(smaller_image, (100, 32))
                    smaller_im = smaller_im[:, :, ::-1]
                    preds = crnn_session.run(
                        decodes, feed_dict={cropped_image: [smaller_im]})
                    preds = decoder.writer.sparse_tensor_to_str(preds[0])
                    if not preds[0] is None:
                        words_list.append(preds[0])
                        final_boxes.append(box)
        print('The words detected are {}'.format(', '.join(words_list)))
        write_to_file(file_name, words_list, final_boxes)