Beispiel #1
0
def generate_book_page_imgs(obj_num=10,
                            text_type="horizontal",
                            page_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        book_page_imgs_dir, book_page_tags_file = BOOK_PAGE_IMGS_H, BOOK_PAGE_TAGS_FILE_H
    if text_type == "v":
        book_page_imgs_dir, book_page_tags_file = BOOK_PAGE_IMGS_V, BOOK_PAGE_TAGS_FILE_V

    check_or_makedirs(book_page_imgs_dir)

    _shape = page_shape
    with open(book_page_tags_file, "w", encoding="utf-8") as fw:
        for i in range(obj_num):
            if page_shape is None and text_type == "h":
                _shape = (random.randint(480, 720), random.randint(640, 960))
            if page_shape is None and text_type == "v":
                _shape = (random.randint(640, 960), random.randint(480, 720))

            PIL_page, text_bbox_list, split_pos_list = create_book_page(
                _shape, text_type=text_type)
            image_tags = {
                "text_bbox_list": text_bbox_list,
                "split_pos_list": split_pos_list
            }

            img_name = "book_page_%d.jpg" % i
            save_path = os.path.join(book_page_imgs_dir, img_name)
            PIL_page.save(save_path, format="jpeg")
            fw.write(img_name + "\t" + json.dumps(image_tags) + "\n")

            if i % 50 == 0:
                print("Process bar: %.2f%%" % (i * 100 / obj_num))
                sys.stdout.flush()
def generate_one_text_line_imgs(obj_num=100, text_type="horizontal", text_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        text_line_imgs_dir, text_line_tags_file = ONE_TEXT_LINE_IMGS_H, ONE_TEXT_LINE_TAGS_FILE_H
    elif text_type == "v":
        text_line_imgs_dir, text_line_tags_file = ONE_TEXT_LINE_IMGS_V, ONE_TEXT_LINE_TAGS_FILE_V
    else:
        raise ValueError

    check_or_makedirs(text_line_imgs_dir)

    _shape = text_shape
    with open(text_line_tags_file, "w", encoding="utf-8") as fw:
        for i in range(obj_num):
            if text_shape is None and text_type == "h":
                _shape = (random.randint(38, 72), random.randint(540, 1280))
            if text_shape is None and text_type == "v":
                _shape = (random.randint(540, 1280), random.randint(38, 72))

            PIL_text, char_and_box_list, split_pos_list = create_one_text_line(_shape, text_type=text_type)
            image_tags = {"char_and_box_list": char_and_box_list, "split_pos_list": split_pos_list}

            img_name = "text_line_%d.jpg" % i
            save_path = os.path.join(text_line_imgs_dir, img_name)
            PIL_text.save(save_path, format="jpeg")
            fw.write(img_name + "\t" + json.dumps(image_tags) + "\n")

            if i % 50 == 0:
                print("Process bar: %.2f%%" % (i * 100 / obj_num))
                sys.stdout.flush()
def generate_two_text_line_imgs(obj_num=100, text_type="horizontal", text_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        text_line_imgs_dir, text_line_tags_file = TWO_TEXT_LINE_IMGS_H, TWO_TEXT_LINE_TAGS_FILE_H
    elif text_type == "v":
        text_line_imgs_dir, text_line_tags_file = TWO_TEXT_LINE_IMGS_V, TWO_TEXT_LINE_TAGS_FILE_V
    else:
        raise ValueError

    check_or_makedirs(text_line_imgs_dir)

    _shape = text_shape
    with open(text_line_tags_file, "w", encoding="utf-8") as fw:
        for i in range(obj_num):
            if text_shape is None and text_type == "h":
                _shape = (random.randint(64, 108), random.randint(108, 1024))  # 双行文本数据无需太长
            if text_shape is None and text_type == "v":
                _shape = (random.randint(108, 1024), random.randint(64, 108))  # 双行文本数据无需太长

            # 训练双行文本的切分,既需要生成双行数据,也需要生成单行数据(不切分的情况)
            PIL_text, split_pos_list = create_two_text_line(_shape, text_type=text_type)
            image_tags = {"split_pos_list": split_pos_list}

            img_name = "text_line_%d.jpg" % i
            save_path = os.path.join(text_line_imgs_dir, img_name)
            PIL_text.save(save_path, format="jpeg")
            fw.write(img_name + "\t" + json.dumps(image_tags) + "\n")

            if i % 50 == 0:
                print("Process bar: %.2f%%" % (i * 100 / obj_num))
                sys.stdout.flush()
Beispiel #4
0
    def detect_image(self, img_path, dest_dir, background="white"):
        if not os.path.exists(img_path): return
        img_name = os.path.basename(img_path)
        check_or_makedirs(dest_dir)

        PIL_img = Image.open(img_path)
        if PIL_img.mode != "L":
            PIL_img = PIL_img.convert("L")
        np_img = np.array(PIL_img, dtype=np.uint8)

        h, w = np_img.shape[:2]
        new_h = -h % 32 + h
        new_w = -w % 32 + w
        batch_imgs = np.empty(shape=(1, new_h, new_w), dtype=np.float32)
        if background == "white":
            batch_imgs.fill(255)
        elif background == "black":
            batch_imgs.fill(0)
        else:
            ValueError("Optional image background: 'white', 'black'.")
        batch_imgs[0, :h, :w] = np_img
        batch_imgs = np.expand_dims(batch_imgs, axis=-1)

        start = timer()  # 起始时间
        out_boxes, out_scores, out_classes = self.predict_model.predict(
            x=batch_imgs)
        print('Time {:.2f}s, found {} boxes in {}'.format(
            timer() - start, len(out_boxes), img_name))

        np_img_rgb = draw_boxes(np_img, out_boxes, out_scores, out_classes)
        PIL_img = Image.fromarray(np_img_rgb)
        PIL_img.save(os.path.join(dest_dir, img_name), format="jpeg")
Beispiel #5
0
def generate_mix_text_line_tfrecords(obj_num=100,
                                     text_type="horizontal",
                                     text_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        text_line_tfrecords_dir = MIX_TEXT_LINE_TFRECORDS_H
    if text_type == "v":
        text_line_tfrecords_dir = MIX_TEXT_LINE_TFRECORDS_V

    check_or_makedirs(text_line_tfrecords_dir)

    # 可以把生成的图片直接存入tfrecords文件
    # 而不必将生成的图片先保存到磁盘,再从磁盘读取出来保存到tfrecords文件,这样效率太低
    writers_list = \
        [tf.io.TFRecordWriter(os.path.join(text_line_tfrecords_dir, "text_lines_%d.tfrecords" % i))
         for i in range(20)]

    # 保存生成的文本图片
    _shape = text_shape
    for i in range(obj_num):
        writer = random.choice(writers_list)

        if text_shape is None and text_type == "h":
            _shape = (random.randint(54, 108), random.randint(720, 1280))
        if text_shape is None and text_type == "v":
            _shape = (random.randint(720, 1280), random.randint(54, 108))

        PIL_text, _, split_pos_list = create_mix_text_line(_shape,
                                                           text_type=text_type)

        bytes_image = PIL_text.tobytes()  # 将图片转化为原生bytes
        split_positions = np.array(split_pos_list, dtype=np.int32).tobytes()

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'bytes_image':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[bytes_image])),
                'img_height':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_text.height])),
                'img_width':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_text.width])),
                'split_positions':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[split_positions]))
            }))
        writer.write(example.SerializeToString())

        if i % 50 == 0:
            print("Process bar: %.2f%%" % (i * 100 / obj_num))
            sys.stdout.flush()

    # 关闭所有的tfrecords写者
    [writer.close() for writer in writers_list]
    return
Beispiel #6
0
def main(img_path,
         dest_dir,
         segment_task="book_page",
         text_type="horizontal",
         model_struc="densenet_gru",
         weights_path=""):
    check_or_makedirs(dest_dir)
    K.set_learning_phase(False)
    _, fixed_shape, feat_stride = get_segment_task_params(segment_task)
    _, ckpt_dir, logs_dir = get_segment_task_path(segment_task)
    if not os.path.exists(weights_path):
        weights_path = os.path.join(ckpt_dir,
                                    model_struc + "_ctpn_finished.h5")
        assert os.path.exists(weights_path)

    # 加载模型
    segment_model = work_net(stage="predict",
                             segment_task=segment_task,
                             text_type=text_type,
                             model_struc=model_struc)
    segment_model.load_weights(weights_path, by_name=True)
    print("\nLoad model weights from %s\n" % weights_path)
    # ctpn_model.summary()

    count = 0
    for raw_np_img, img_name in load_images(img_path):
        count += 1

        np_img, _, scale_ratio = adjust_img_to_fixed_shape(
            raw_np_img,
            fixed_shape=fixed_shape,
            feat_stride=feat_stride,
            segment_task=segment_task,
            text_type=text_type)
        batch_images = np_img[np.newaxis, :, :, :]

        split_positions, scores = segment_model.predict(x=batch_images)  # 模型预测

        text_type = text_type[0].lower()
        if (segment_task,
                text_type) in (("book_page", "h"), ("double_line", "h"),
                               ("text_line", "v"), ("mix_line", "v")):
            _, split_positions = restore_original_angle(
                np_img=None, pred_split_positions=split_positions)

        split_positions = split_positions / scale_ratio
        image = visualize.draw_split_lines(raw_np_img, split_positions,
                                           scores)  # 可视化

        PIL_img = Image.fromarray(image)
        dest_path = os.path.join(dest_dir,
                                 os.path.splitext(img_name)[0] + ".jpg")
        PIL_img.save(dest_path, format="jpeg")
        print(count, "Finished: " + dest_path)
Beispiel #7
0
def main(img_path,
         dest_dir,
         text_type="vertical",
         weights_path=TRAIN_FINISHED_WEIGHTS):
    check_or_makedirs(dest_dir)
    K.set_learning_phase(False)
    assert os.path.exists(weights_path) and text_type in weights_path

    # 加载模型
    ctpn_model = work_net("predict",
                          batch_size=1,
                          text_type=text_type,
                          model_struc="densenet_gru")
    ctpn_model.load_weights(weights_path, by_name=True)
    print("\nLoad model weights from %s\n" % weights_path)
    # ctpn_model.summary()

    count = 0
    for np_img, img_name in load_images(img_path):
        count += 1

        np_img = adjust_img_into_model(np_img,
                                       text_type=text_type,
                                       fixed_size=BOOK_PAGE_FIXED_SIZE)
        batch_images = np_img[np.newaxis, :, :, :]

        # 模型预测
        boxes, scores = ctpn_model.predict(x=batch_images)

        boxes = np_utils.remove_pad(boxes[0])
        scores = np_utils.remove_pad(scores[0])[:, 0]

        # 文本行检测器
        textdetector = TextDetector(DETECT_MODE='H')
        text_lines = textdetector.detect(boxes, scores, np_img.shape[:2])

        # 可视化
        np_img = visualize.draw_text_lines(np_img, text_lines)

        if text_type.lower() in ("v", "vertical"):
            np_img = restore_text_horizontal_to_vertical(np_img)

        PIL_img = Image.fromarray(np_img)
        dest_path = os.path.join(dest_dir,
                                 os.path.splitext(img_name)[0] + ".jpg")
        PIL_img.save(dest_path, format="jpeg")
        print(count, "Finished: " + dest_path)
def check_tags(tags_file, segment_task, text_type):
    with open(tags_file, "r", encoding="utf8") as fr:
        lines = [line.strip() for line in fr.readlines()]
    
    save_path = os.path.join(SEGMENT_BOOK_PAGE_ROOT_DIR, "samples")
    check_or_makedirs(save_path)
    
    for i, line in enumerate(lines):
        np_img, split_pos = get_image_and_split_pos(line, segment_task="book_page")

        text_type = text_type[0].lower()
        if (segment_task, text_type) in (("book_page", "h"), ("double_line", "h"), ("text_line", "v"), ("mix_line", "v")):
            np_img, split_pos = rotate_90_degrees(np_img, split_pos)

        np_img = draw_split_lines(np_img, split_pos)
        PIL_img = Image.fromarray(np_img)
        PIL_img.save(os.path.join(save_path, str(i) + ".jpg"))
def get_callbacks(model_struc="densenet_gru", text_type="horizontal"):
    check_or_makedirs(dir_name=CTPN_CKPT_DIR)
    checkpoint = ModelCheckpoint(filepath=os.path.join(CTPN_CKPT_DIR, model_struc + "_" + text_type + "_ctpn_{epoch:04d}.h5"),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True)
    
    lr_reducer = ReduceLROnPlateau(monitor='loss',
                                   factor=0.1,
                                   cooldown=0,
                                   patience=10,
                                   min_lr=1e-4)
    
    check_or_makedirs(CTPN_LOGS_DIR)
    logs = TensorBoard(log_dir=CTPN_LOGS_DIR)
    
    return [checkpoint, lr_reducer, logs]
Beispiel #10
0
def train(num_epochs,
          start_epoch=0,
          model_type="horizontal",
          model_struc="resnet_lstm"):
    backend.set_learning_phase(True)

    crnn = CRNN(model_type=model_type, model_struc=model_struc)
    model = crnn.model_for_training()
    model.compile(optimizer=optimizers.Adagrad(learning_rate=0.01),
                  loss={
                      "ctc_loss": lambda y_true, out_loss: out_loss
                  })

    if start_epoch > 0:
        weights_prefix = os.path.join(
            CRNN_CKPT_DIR, model_type + "_" + model_struc +
            "_crnn_weights_%05d_" % start_epoch)
        model.load_weights(filepath=weights_prefix)

    check_or_makedirs(CRNN_CKPT_DIR)
    ckpt_path = os.path.join(
        CRNN_CKPT_DIR, model_type + "_" + model_struc +
        "_crnn_weights_{epoch:05d}_{val_loss:.2f}.tf")
    checkpoint = callbacks.ModelCheckpoint(filepath=ckpt_path,
                                           monitor='val_loss',
                                           verbose=1,
                                           save_best_only=True,
                                           save_weights_only=True,
                                           mode="min")

    model.fit_generator(
        generator=create_text_lines_batch(type=model_type,
                                          batch_size=BATCH_SIZE_TEXT_LINE),
        steps_per_epoch=100,
        epochs=start_epoch + num_epochs,
        verbose=1,
        callbacks=[checkpoint],
        validation_data=load_text_lines_batch(type=model_type,
                                              batch_size=BATCH_SIZE_TEXT_LINE),
        validation_steps=50,
        max_queue_size=50,
        workers=2,
        use_multiprocessing=True,
        initial_epoch=start_epoch)
def convert_annotation(img_sources=None, tfrecords_dir=None, dest_file=None):
    assert [img_sources, tfrecords_dir].count(None) == 1

    check_or_makedirs(os.path.dirname(dest_file))
    with open(dest_file, "w", encoding="utf-8") as fw:
        if img_sources is not None:
            for src_file, root_dir in img_sources:
                with open(src_file, "r", encoding="utf-8") as fr:
                    for line in fr:
                        img_name, tags_str = line.strip().split("\t")
                        img_path = os.path.join(root_dir, img_name)
                        fw.write(img_path + "\t" + tags_str + "\n")

        elif tfrecords_dir is not None:
            assert os.path.exists(tfrecords_dir)
            for file in os.listdir(tfrecords_dir):
                if file.endswith(".tfrecords"):
                    file_path = os.path.join(tfrecords_dir, file)
                    fw.write(file_path + "\n")
def generate_book_page_imgs_with_img(obj_num=10,
                                     text_type="horizontal",
                                     init_num=0,
                                     page_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        book_page_imgs_dir, book_page_tags_file = BOOK_PAGE_IMGS_H, BOOK_PAGE_TAGS_FILE_H
    elif text_type == "v":
        book_page_imgs_dir, book_page_tags_file = BOOK_PAGE_IMGS_V, BOOK_PAGE_TAGS_FILE_V
    else:
        raise ValueError('text_type should be horizontal or vertical')

    check_or_makedirs(book_page_imgs_dir)

    _shape = page_shape

    with open(book_page_tags_file, "w", encoding="utf-8") as fw:
        for i in range(init_num, init_num + obj_num):
            '''
            if page_shape is None and text_type == "h":
                _shape = (random.randint(480, 720), random.randint(640, 960))
            if page_shape is None and text_type == "v":
                _shape = (random.randint(640, 960), random.randint(480, 720))
            '''
            if page_shape is None:
                _shape = random.choice(BOOK_PAGE_SHAPE_LIST)

            PIL_page, text_bbox_list, split_pos_list = create_book_page_with_img(
                _shape, text_type=text_type)
            image_tags = {
                "text_bbox_list": text_bbox_list,
                "split_pos_list": split_pos_list
            }

            img_name = "book_page_%d.jpg" % i
            save_path = os.path.join(book_page_imgs_dir, img_name)
            PIL_page.save(save_path, format="jpeg")
            fw.write(img_name + "\t" + json.dumps(image_tags) + "\n")

            if i % 50 == 0:
                print(" %d / %d Done" % (i, obj_num))
                sys.stdout.flush()
Beispiel #13
0
def extract_annotation(imgs_dir=None, tfrecords_dir=None, dest_file=None):
    assert [imgs_dir, tfrecords_dir].count(None) == 1
    
    check_or_makedirs(os.path.dirname(dest_file))
    with open(dest_file, "w", encoding="utf-8") as fw:
        if imgs_dir is not None:
            for root, dirs, files_list in os.walk(imgs_dir):
                if len(files_list) > 0:
                    for file_name in files_list:
                        if file_name.lower()[-4:] in (".gif", ".jpg", ".png"):
                            image_path = os.path.join(root, file_name)
                            fw.write(image_path + "\n")
        
        elif tfrecords_dir is not None:
            assert os.path.exists(tfrecords_dir)
            for file in os.listdir(tfrecords_dir):
                if file.endswith(".tfrecords"):
                    file_path = os.path.join(tfrecords_dir, file)
                    fw.write(file_path + "\n")
def get_callbacks(model_struc="densenet_gru"):
    check_or_makedirs(dir_name=CHAR_RECOG_CKPT_DIR)
    checkpoint = ModelCheckpoint(filepath=os.path.join(
        CHAR_RECOG_CKPT_DIR,
        "char_recog_with_compo_" + model_struc + "_{epoch:04d}.h5"),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True)

    lr_reducer = ReduceLROnPlateau(
        monitor='loss',
        factor=0.5,
        patience=10,  # num of epochs
        cooldown=0,
        min_lr=0)

    check_or_makedirs(CHAR_RECOG_LOGS_DIR)
    logs = TensorBoard(log_dir=CHAR_RECOG_LOGS_DIR)

    return [checkpoint, lr_reducer, logs]
def get_callbacks(segment_task, model_struc="densenet_gru"):
    _, ckpt_dir, logs_dir = get_segment_task_path(segment_task)
    
    check_or_makedirs(dir_name=ckpt_dir)
    checkpoint = ModelCheckpoint(
        filepath=os.path.join(ckpt_dir, segment_task + "_segment_" + model_struc + "_{epoch:04d}.h5"),
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=True)
    
    lr_reducer = ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.6,
                                   cooldown=0,
                                   patience=4,  # num of epochs
                                   min_lr=0)
    
    check_or_makedirs(logs_dir)
    logs = TensorBoard(log_dir=logs_dir)
    
    return [checkpoint, lr_reducer, logs]
Beispiel #16
0
def generate_tfrecords(obj_size=CHAR_IMG_SIZE,
                       num_imgs_per_font=NUM_IMAGES_PER_FONT):
    print("Get font_file_list ...")
    font_file_list = [
        os.path.join(FONT_FILE_DIR, font_name)
        for font_name in os.listdir(FONT_FILE_DIR)
        if font_name.lower()[-4:] in (".otf", ".ttf", ".ttc", ".fon")
    ]

    # 创建保存tfrecords文件的目录
    check_or_makedirs(CHAR_TFRECORDS_DIR)

    # 可以把生成的图片直接存入tfrecords文件
    # 不必将生成的图片先保存到磁盘,再从磁盘读取出来保存到tfrecords文件,这样效率太低
    # 通常是用某种字体对一个字生成很多个增强的图片,这些图片最好是分开存放
    # 若直接把同一字体同一个字的多张图片连续放到同一个tfrecords里,那么训练batch的多样性不好
    writers_list = \
        [tf.io.TFRecordWriter(os.path.join(CHAR_TFRECORDS_DIR, "chinese_imgs_%d_from_font.tfrecords" % i))
         for i in range(20)]

    print("Begin to generate images ...")
    chinese_char_num = len(CHAR2ID_DICT)
    total_num = len(font_file_list) * chinese_char_num
    count = 0
    for font_file in font_file_list:  # 外层循环是字体

        for chinese_char, bigger_PIL_img in generate_all_chinese_images_bigger(
                font_file, image_size=int(obj_size * 1.2)):  # 内层循环是字
            # 检查生成的灰度图像是否可用,黑底白字
            image_data = list(bigger_PIL_img.getdata())
            if sum(image_data) < 10:
                continue

            PIL_img_list = \
                [get_augmented_image(bigger_PIL_img, obj_size, rotation=True, dilate=False, erode=True, reverse_color=True)
                 for i in range(num_imgs_per_font)]

            # 保存生成的字体图片
            for PIL_img in PIL_img_list:
                writer = random.choice(writers_list)

                bytes_image = PIL_img.tobytes()  # 将图片转化为原生bytes
                bytes_char = chinese_char.encode('utf-8')
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'bytes_image':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[bytes_image])),
                        'img_height':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=[PIL_img.height])),
                        'img_width':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=[PIL_img.width])),
                        'bytes_char':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[bytes_char]))
                    }))
                writer.write(example.SerializeToString())

            # 当前进度
            count += 1
            if count % 200 == 0:
                print("Progress bar: %.2f%%" % (count * 100 / total_num))
                sys.stdout.flush()

    # 关闭所有的tfrecords写者
    [writer.close() for writer in writers_list]
Beispiel #17
0
def convert_tfrecords(obj_size=CHAR_IMG_SIZE,
                      num_imgs_per_font=NUM_IMAGES_PER_FONT):
    print("Get total images num ...")
    font_images_num_list = [
        len(os.listdir(os.path.join(EXTERNEL_IMAGES_DIR, content)))
        for content in os.listdir(EXTERNEL_IMAGES_DIR)
        if os.path.isdir(os.path.join(EXTERNEL_IMAGES_DIR, content))
    ]

    # 创建保存tfrecords文件的目录
    check_or_makedirs(CHAR_TFRECORDS_DIR)

    # 可以把变换的图片直接存入tfrecords文件
    # 不必将变换的图片先保存到磁盘,再从磁盘读取出来保存到tfrecords文件,这样效率太低
    # 通常是用一种字体的一个字图片增强出很多个图片,这些图片最好是分开存放
    # 若直接把同一字体同一个字图片增强出的多张图片连续放到同一个tfrecords里,那么每一个训练batch的多样性就不好
    writers_list = \
        [tf.io.TFRecordWriter(os.path.join(CHAR_TFRECORDS_DIR, "chinese_imgs_%d_from_img.tfrecords" % i))
         for i in range(20)]

    print("Begin to convert images ...")
    total_num = sum(font_images_num_list)
    count = 0
    for font_type, image_paths_list in get_external_image_paths(
            root_dir=EXTERNEL_IMAGES_DIR):

        for image_path in image_paths_list:
            chinese_char = os.path.basename(image_path)[0]

            # 加载外部图片,将图片调整为正方形
            # 为了保证图片旋转时不丢失信息,生成的图片比本来的图片稍微bigger
            # 为了方便图片的后续处理,图片必须加载为黑底白字,可以用reverse_color来调整
            try:
                bigger_PIL_img = load_external_image_bigger(
                    image_path, white_background=True, reverse_color=True)
            except OSError:
                print("The image %s result in OSError !" % image_path)
                continue

            PIL_img_list = \
                [get_augmented_image(bigger_PIL_img, obj_size, rotation=True, dilate=False, erode=True, reverse_color=True)
                 for i in range(num_imgs_per_font)]

            # 保存生成的字体图片
            for index, PIL_img in enumerate(PIL_img_list):
                # train_set和test_set的比例约为 5:1
                writer = random.choice(writers_list)

                bytes_image = PIL_img.tobytes()  # 将图片转化为原生bytes
                bytes_char = chinese_char.encode('utf-8')
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'bytes_image':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[bytes_image])),
                        'bytes_char':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[bytes_char])),
                    }))
                writer.write(example.SerializeToString())

            # 当前进度
            count += 1
            if count % 200 == 0:
                print("Progress bar: %.2f%%" % (count * 100 / total_num))
                sys.stdout.flush()

    # 关闭所有的 tfrecords writer
    [writer.close() for writer in writers_list]
Beispiel #18
0
def generate_book_page_tfrecords(obj_num=10,
                                 text_type="horizontal",
                                 init_num=0,
                                 page_shape=None):
    text_type = check_text_type(text_type)

    if text_type == "h":
        book_page_tfrecords_dir = BOOK_PAGE_TFRECORDS_H
    if text_type == "v":
        book_page_tfrecords_dir = BOOK_PAGE_TFRECORDS_V

    check_or_makedirs(book_page_tfrecords_dir)

    # 我们可以把生成的图片直接存入tfrecords文件
    # 而不必将生成的图片先保存到磁盘,再从磁盘读取出来保存到tfrecords文件,这样效率太低
    writers_list = \
        [tf.io.TFRecordWriter(os.path.join(book_page_tfrecords_dir, "book_pages_%d.tfrecords" % i))
         for i in range(init_num, init_num+20)]

    # 保存生成的书页图片
    _shape = page_shape
    for i in range(obj_num):
        writer = random.choice(writers_list)
        if page_shape is None and text_type == "h":
            _shape = (random.randint(480, 720), random.randint(640, 960))
        if page_shape is None and text_type == "v":
            _shape = (random.randint(640, 960), random.randint(480, 720))

        PIL_page, text_bbox_list, split_pos_list = create_book_page(
            _shape, text_type=text_type)

        bytes_image = PIL_page.tobytes()  # 将图片转化为原生bytes
        text_boxes = np.array(text_bbox_list, dtype=np.int32).tobytes()
        split_positions = np.array(split_pos_list, dtype=np.int32).tobytes()

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'bytes_image':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[bytes_image])),
                'img_height':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_page.height])),
                'img_width':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_page.width])),
                'text_boxes':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[text_boxes])),
                'split_positions':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[split_positions]))
            }))
        writer.write(example.SerializeToString())

        if i % 50 == 0:
            print("Process bar: %.2f%%" % (i * 100 / obj_num))
            sys.stdout.flush()

    # 关闭所有的tfrecords写者
    [writer.close() for writer in writers_list]
    return
Beispiel #19
0
def main(book_page_dir,
         dest_dir=None,
         is_mix_line=False,
         text_type="vertical",
         model_struc="densenet_gru"):
    if dest_dir is not None: check_or_makedirs(dest_dir)
    K.set_learning_phase(False)

    # 加载模型
    segment_book_page_model = SegmentModel("book_page",
                                           text_type,
                                           model_struc,
                                           weights=99)
    segment_mix_line_model = SegmentModel("mix_line",
                                          text_type,
                                          model_struc,
                                          weights=65)
    segment_double_line_model = SegmentModel("double_line",
                                             text_type,
                                             model_struc,
                                             weights=72)
    segment_text_line_model = SegmentModel("text_line",
                                           text_type,
                                           model_struc,
                                           weights=42)
    recog_model = CharRecogModel(model_struc, weights=121)

    # 切分书页
    np_page_list, page_name_list, page_split_pos_list, page_scores_list = segment_book_page_model.segment_predict(
        img_paths=book_page_dir)
    for i in range(len(np_page_list)):
        try:
            split_line_dict = {
                "page": [],
                "mix": [],
                "double": [],
                "single": []
            }
            start_coord = np.array([0, 0], dtype=np.int32)

            np_line_list, page_split_lines = \
                extract_slices(np_page_list[i], page_split_pos_list[i], start_coord, segment_task="book_page", text_type=text_type)
            split_line_dict["page"].append(page_split_lines)

            text_list = []
            if text_type in ("v", "vertical") and is_mix_line:
                np_mix_line_list = np_line_list

                # 切分单双行
                _, _, mix_split_pos_list, mix_scores_list = segment_mix_line_model.segment_predict(
                    images=np_mix_line_list)

                for j in range(len(np_mix_line_list)):
                    np_double_line_list, mix_split_lines = \
                        extract_slices(np_mix_line_list[j], mix_split_pos_list[j], page_split_lines[j+1, :2], segment_task="mix_line", text_type=text_type)
                    split_line_dict["mix"].append(mix_split_lines)

                    # 切分双行
                    _, _, double_split_pos_list, double_scores_list = segment_double_line_model.segment_predict(
                        images=np_double_line_list)

                    img_w = np_mix_line_list[j].shape[1]
                    double_split_pos_list, double_scores_list = check_and_correct_double_split(
                        double_split_pos_list, double_scores_list, img_w)
                    text1, text2 = "", ""
                    for k in range(len(np_double_line_list)):
                        np_text_line_list, double_split_lines = \
                            extract_slices(np_double_line_list[k], double_split_pos_list[k], mix_split_lines[k, :2], segment_task="double_line", text_type=text_type)
                        split_line_dict["double"].append(double_split_lines)

                        # 切分单行(文本行)
                        _, _, char_split_pos_list, char_scores_list = segment_text_line_model.segment_predict(
                            images=np_text_line_list)

                        assert len(np_text_line_list) in (1, 2)
                        sub_text1, sub_text2 = "", ""
                        for t in range(len(np_text_line_list)):
                            np_char_list, single_split_lines = \
                                extract_slices(np_text_line_list[t], char_split_pos_list[t], double_split_lines[t+1, :2], segment_task="text_line", text_type=text_type)
                            split_line_dict["single"].append(
                                single_split_lines)

                    #         # 单字识别
                    #         _, _, pred_topk_chars_list = recog_model.char_predict(images=np_char_list)
                    #
                    #         # 识别结果
                    #         text_str = "".join([chars[0] if len(chars) > 0 else "?" for chars in pred_topk_chars_list])
                    #         if t == 0:
                    #             sub_text1 = text_str
                    #         else:
                    #             sub_text2 = text_str
                    #
                    #     # 等长调整
                    #     len_1, len_2 = len(sub_text1), len(sub_text2)
                    #     max_len = max(len_1, len_2)
                    #     text1 += sub_text1 + " " * (max_len - len_1)
                    #     text2 += sub_text2 + " " * (max_len - len_2)
                    #
                    # # 保存当前单双行文本
                    # text_list.extend([text1, text2, "\n"])

            elif text_type in ("v", "vertical", "h",
                               "horizontal") and not is_mix_line:
                np_text_line_list = np_line_list

                # 切分单行(文本行)
                _, _, char_split_pos_list, char_scores_list = segment_text_line_model.segment_predict(
                    images=np_text_line_list)

                for t in range(len(np_text_line_list)):
                    _t = t + 1 if text_type in ("v", "vertical") else t
                    np_char_list, single_split_lines = \
                        extract_slices(np_text_line_list[t], char_split_pos_list[t], page_split_lines[_t, :2], segment_task="text_line", text_type=text_type)
                    split_line_dict["single"].append(single_split_lines)

                    # 单字识别
                    _, _, pred_topk_chars_list = recog_model.char_predict(
                        images=np_char_list)

                    # 识别结果
                    text_str = "".join([
                        chars[0] if len(chars) > 0 else "?"
                        for chars in pred_topk_chars_list
                    ])
                    text_list.extend([text_str, "\n"])  # 保存

            else:
                ValueError(
                    "Horizontal book page should not exist single-double text line."
                )

            # save
            if dest_dir is not None:
                PIL_page_drawn = draw_split_lines(
                    np_page=np_page_list[i],
                    split_line_dict=split_line_dict)  # draw
                page_name = os.path.splitext(page_name_list[i])[0]
                PIL_page_drawn.save(os.path.join(dest_dir, page_name + ".jpg"),
                                    format="jpeg")
                with open(os.path.join(dest_dir, page_name + ".txt"),
                          "w",
                          encoding="utf8") as fw:
                    fw.write("\n".join(text_list))

            # print
            print("\n*******************", page_name_list[i],
                  "*******************\n")
            print("\n".join(text_list))

        except:
            continue
Beispiel #20
0
def segment_predict(images=None,
                    img_paths=None,
                    dest_dir=None,
                    segment_model=None,
                    segment_task="book_page",
                    text_type="horizontal",
                    model_struc="densenet_gru",
                    weights=""):
    
    # images
    if images is not None:
        np_img_list = convert_images(images)
        img_name_list = [str(i)+".jpg" for i in range(len(np_img_list))]
    else:
        assert img_paths is not None
        np_img_list, img_name_list = load_images(img_paths)
    
    # book page pre-processing
    if segment_task == "book_page":
        np_img_list = book_page_pre_processing(np_img_list)
    
    # model
    if segment_model is None:
        K.set_learning_phase(False)
        weights_path = model_weights_path(weights, segment_task, model_struc)
        
        # 加载模型
        segment_model = work_net(stage="predict", segment_task=segment_task, text_type=text_type, model_struc=model_struc)
        segment_model.load_weights(weights_path, by_name=True)
        print("\nLoad model weights from %s\n" % weights_path)
        # segment_model.summary()
    
    # predict
    batch_size, fixed_h, feat_stride = get_segment_task_params(segment_task)
    text_type = text_type[0].lower()
    split_positions_list, scores_list = [], []
    for i in range(0, len(np_img_list), batch_size):
        _images_list, _scale_ratio_list = [], []
        for np_img in np_img_list[i:i+batch_size]:
            np_img, _, scale_ratio = adjust_img_to_fixed_height(np_img, None, fixed_h, segment_task, text_type)
            _images_list.append(np_img)
            _scale_ratio_list.append(scale_ratio)
        batch_images, real_images_width, _ = pack_a_batch(_images_list, None, feat_stride, background="white")

        nms_split_positions, nms_scores = segment_model.predict(x=[batch_images, real_images_width])  # 模型预测
        
        for j in range(len(batch_images)):
            scores = remove_pad_np(nms_scores[j])[:, 0]
            split_positions = remove_pad_np(nms_split_positions[j])
            split_positions = split_positions / _scale_ratio_list[j]
            if (segment_task, text_type) in (("book_page", "h"), ("double_line", "h"), ("text_line", "v"), ("mix_line", "v")):
                _, split_positions = restore_original_angle(np_img=None, pred_split_positions=split_positions)
            split_positions_list.append(split_positions)
            scores_list.append(scores)
    
    # draw
    if dest_dir is not None:
        check_or_makedirs(dest_dir)
        for i in range(len(np_img_list)):
            if (segment_task, text_type) in (("book_page", "h"), ("double_line", "h"), ("text_line", "v"), ("mix_line", "v")):
                np_img, split_positions = rotate_90_degrees(np_img_list[i], split_positions_list[i])
            else:
                np_img, split_positions = np_img_list[i], split_positions_list[i]
            
            np_img = visualize.draw_split_lines(np_img, split_positions, scores_list[i])  # 可视化

            if (segment_task, text_type) in (("book_page", "h"), ("double_line", "h"), ("text_line", "v"), ("mix_line", "v")):
                np_img, _ = restore_original_angle(np_img)
            
            PIL_img = Image.fromarray(np_img)
            dest_path = os.path.join(dest_dir, os.path.splitext(img_name_list[i])[0] + ".jpg")
            PIL_img.save(dest_path, format="jpeg")
            print(i, "Finished: " + dest_path)
    
    return np_img_list, img_name_list, split_positions_list, scores_list
def generate_one_text_line_tfrecords(obj_num=100,
                                     text_type="horizontal",
                                     init_num=0,
                                     text_shape=None,
                                     edges=False):
    text_type = check_text_type(text_type)

    if text_type == "h":
        text_line_tfrecords_dir = ONE_TEXT_LINE_TFRECORDS_H
    if text_type == "v":
        text_line_tfrecords_dir = ONE_TEXT_LINE_TFRECORDS_V

    check_or_makedirs(text_line_tfrecords_dir)

    # 可以把生成的图片直接存入tfrecords文件
    # 而不必将生成的图片先保存到磁盘,再从磁盘读取出来保存到tfrecords文件,这样效率太低
    writers_list = \
        [tf.io.TFRecordWriter(os.path.join(text_line_tfrecords_dir, "text_lines_%d.tfrecords" % i))
         for i in range(init_num, init_num+20)]

    # 保存生成的文本图片
    _shape = text_shape
    for i in range(obj_num):
        writer = random.choice(writers_list)

        if text_shape is None and text_type == "h":
            _shape = (random.randint(38, 72), random.randint(540, 1280))
        if text_shape is None and text_type == "v":
            _shape = (random.randint(540, 1280), random.randint(38, 72))

        PIL_text, char_and_box_list, split_pos_list = create_one_text_line(
            _shape, text_type=text_type, edges=edges)

        bytes_image = PIL_text.tobytes()  # 将图片转化为原生bytes
        bytes_chars = "".join([
            chinese_char for chinese_char, gt_box in char_and_box_list
        ]).encode("utf-8")
        labels = np.array(
            [CHAR2ID_DICT[char] for char, gt_box in char_and_box_list],
            dtype=np.int32).tobytes()
        gt_boxes = np.array(
            [gt_box for chinese_char, gt_box in char_and_box_list],
            dtype=np.int32).tobytes()
        split_positions = np.array(split_pos_list, dtype=np.int32).tobytes()

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'bytes_image':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[bytes_image])),
                'img_height':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_text.height])),
                'img_width':
                tf.train.Feature(int64_list=tf.train.Int64List(
                    value=[PIL_text.width])),
                'bytes_chars':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[bytes_chars])),
                'labels':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[labels])),
                'gt_boxes':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[gt_boxes])),
                'split_positions':
                tf.train.Feature(bytes_list=tf.train.BytesList(
                    value=[split_positions]))
            }))
        writer.write(example.SerializeToString())

        if i % 50 == 0:
            print("Process bar: %.2f%%" % (i * 100 / obj_num))
            sys.stdout.flush()

    # 关闭所有的tfrecords写者
    [writer.close() for writer in writers_list]
    return
img = img + noise
img = tf.where(img < 0, 0, img)
img = tf.where(img > 255, 255, img)
img = tf.cast(img, tf.uint8)

for i in range(100):
    print(i, img.dtype)

    # ****************************
    delta = -1 + i * 2 / 100
    im = tf.image.adjust_brightness(img, delta=delta)
    print(im.dtype)
    np_im = im.numpy().astype(np.uint8)
    p_im = Image.fromarray(np_im)
    check_or_makedirs(os.path.join("..", "tf_image", "brightness"))
    im_path = os.path.join("..", "tf_image", "brightness",
                           "delta_" + str(delta) + ".jpg")
    p_im.save(im_path, format="jpeg")

    # ****************************
    contrast_factor = 0.3 + i * 1.5 / 100
    im = tf.image.adjust_contrast(img, contrast_factor=contrast_factor)
    print(im.dtype)
    np_im = im.numpy().astype(np.uint8)
    p_im = Image.fromarray(np_im)
    check_or_makedirs(os.path.join("..", "tf_image", "contrast"))
    im_path = os.path.join("..", "tf_image", "contrast",
                           "contrast_factor_" + str(contrast_factor) + ".jpg")
    p_im.save(im_path, format="jpeg")