コード例 #1
0
class TextRecognizer:
    def __init__(self,
                 checkpoint_path="./py/checkpoint_recognizer.hdf5",
                 input_size=(1024, 128, 1),
                 max_text_length=128,
                 charset_base=string.printable[:95],
                 architecture="flor"):
        self.tokenizer = None
        self.model = None
        self.checkpoint_path = checkpoint_path
        self.input_size = input_size
        self.max_text_length = max_text_length
        self.charset_base = charset_base
        self.architecture = architecture
        ml_utils.limit_gpu_memory()

        self.load_model()

    def load_model(self):
        self.tokenizer = Tokenizer(chars=self.charset_base,
                                   max_text_length=self.max_text_length)
        self.model = HTRModel(architecture=self.architecture,
                              input_size=self.input_size,
                              vocab_size=self.tokenizer.vocab_size,
                              top_paths=10)
        self.model.compile()
        self.model.load_checkpoint(target=self.checkpoint_path)

    def read_all_text_from_images(self, images):
        output = []
        for img in images:
            output.append(self.read_text_from_image(img))

        return output

    def read_text_from_image(self, img):
        img = pp.preprocess(img, input_size=self.input_size)
        x_test = pp.normalization([img])

        predicts, probabilities = self.model.predict(x_test, ctc_decode=True)
        predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts]

        for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
            return pred[0]

        return ""
コード例 #2
0
    elif args.image:
        tokenizer = Tokenizer(chars=charset_base,
                              max_text_length=max_text_length)

        img = pp.preproc(args.image, input_size=input_size)
        x_test = pp.normalization([img])

        model = HTRModel(architecture=args.arch,
                         input_size=input_size,
                         vocab_size=tokenizer.vocab_size,
                         top_paths=10)

        model.compile()
        model.load_checkpoint(target=target_path)

        predicts, probabilities = model.predict(x_test, ctc_decode=True)
        predicts = [[tokenizer.decode(x) for x in y] for y in predicts]

        print("\n####################################")
        for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
            print("\nProb.  - Predict")

            for (pd, pb) in zip(pred, prob):
                print(f"{pb:.4f} - {pd}")

            cv2.imshow(f"Image {i + 1}", pp.adjust_to_see(img))
        print("\n####################################")
        cv2.waitKey(0)

    else:
        assert os.path.isfile(source_path) or os.path.isfile(target_path)
コード例 #3
0
# get default callbacks and load checkpoint weights file (HDF5) if exists
model.load_checkpoint(target=target_path)

callbacks = model.get_callbacks(logdir=output_path,
                                checkpoint=target_path,
                                verbose=1)

# TODO PREDICT
from data import preproc as pp
# from google.colab.patches import cv2_imshow

start_time = datetime.datetime.now()

# predict() function will return the predicts with the probabilities
predicts, _ = model.predict(x=dtgen.next_test_batch(),
                            steps=dtgen.steps['test'],
                            ctc_decode=True,
                            verbose=1)

# decode to string
predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts]
ground_truth = [x.decode() for x in dtgen.dataset['test']['gt']]

total_time = datetime.datetime.now() - start_time

# mount predict corpus file
with open(os.path.join(output_path, "predict.txt"), "w") as lg:
    for pd, gt in zip(predicts, ground_truth):
        lg.write(f"TE_L {gt}\nTE_P {pd}\n")

# for i, item in enumerate(dtgen.dataset['test']['dt'][:10]):
#     print("=" * 1024, "\n")
コード例 #4
0
            f"Time per item:           {time_epoch / total_item}\n",
            f"Total epochs:            {len(loss)}",
            f"Best epoch               {min_val_loss_i + 1}\n",
            f"Training loss:           {loss[min_val_loss_i]:.8f}",
            f"Validation loss:         {min_val_loss:.8f}"
        ])

        with open(os.path.join(output_path, "train.txt"), "w") as lg:
            lg.write(t_corpus)
            print(t_corpus)

    elif args.test:
        start_time = datetime.datetime.now()

        predicts, _ = model.predict(x=ds.getNext(),
                                    steps=ds.valid_steps,
                                    ctc_decode=True,
                                    verbose=1)

        predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts]

        total_time = datetime.datetime.now() - start_time

        with open(os.path.join(output_path, "predict.txt"), "w") as lg:
            for pd, gt in zip(predicts, dtgen.dataset['test']['gt']):
                lg.write(f"TE_L {gt}\nTE_P {pd}\n")

        evaluate = evaluation.ocr_metrics(
            predicts=predicts,
            ground_truth=dtgen.dataset['test']['gt'],
            norm_accentuation=args.norm_accentuation,
            norm_punctuation=args.norm_punctuation)
コード例 #5
0
        tokenizer = Tokenizer(chars=charset_base,
                              max_text_length=max_text_length)

        img = pp.preprocess(args.image, input_size=input_size)
        x_test = pp.normalization([img])

        model = HTRModel(architecture=args.arch,
                         input_size=input_size,
                         vocab_size=tokenizer.vocab_size,
                         beam_width=10,
                         top_paths=10)

        model.compile(learning_rate=0.001)
        model.load_checkpoint(target=target_path)

        predicts, probabilities = model.predict(x_test, ctc_decode=True)
        predicts = [[tokenizer.decode(x) for x in y] for y in predicts]

        print("\n####################################")
        for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
            print("\nProb.  - Predict")

            for (pd, pb) in zip(pred, prob):
                print(f"{pb:.4f} - {pd}")

            cv2.imshow(f"Image {i + 1}", cv2.imread(args.image))
        print("\n####################################")
        cv2.waitKey(0)

    else:
        assert os.path.isfile(source_path) or os.path.isfile(target_path)