class TextRecognizer:
    def __init__(self,
                 checkpoint_path="./py/checkpoint_recognizer.hdf5",
                 input_size=(1024, 128, 1),
                 max_text_length=128,
                 charset_base=string.printable[:95],
                 architecture="flor"):
        self.tokenizer = None
        self.model = None
        self.checkpoint_path = checkpoint_path
        self.input_size = input_size
        self.max_text_length = max_text_length
        self.charset_base = charset_base
        self.architecture = architecture
        ml_utils.limit_gpu_memory()

        self.load_model()

    def load_model(self):
        self.tokenizer = Tokenizer(chars=self.charset_base,
                                   max_text_length=self.max_text_length)
        self.model = HTRModel(architecture=self.architecture,
                              input_size=self.input_size,
                              vocab_size=self.tokenizer.vocab_size,
                              top_paths=10)
        self.model.compile()
        self.model.load_checkpoint(target=self.checkpoint_path)

    def read_all_text_from_images(self, images):
        output = []
        for img in images:
            output.append(self.read_text_from_image(img))

        return output

    def read_text_from_image(self, img):
        img = pp.preprocess(img, input_size=self.input_size)
        x_test = pp.normalization([img])

        predicts, probabilities = self.model.predict(x_test, ctc_decode=True)
        predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts]

        for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
            return pred[0]

        return ""
        tokenizer = Tokenizer(chars=charset_base,
                              max_text_length=max_text_length)

        img = pp.preproc(args.image, input_size=input_size)
        x_test = pp.normalization([img])

        model = HTRModel(architecture=args.arch,
                         input_size=input_size,
                         vocab_size=tokenizer.vocab_size,
                         top_paths=10)

        model.compile()
        model.load_checkpoint(target=target_path)

        predicts, probabilities = model.predict(x_test, ctc_decode=True)
        predicts = [[tokenizer.decode(x) for x in y] for y in predicts]

        print("\n####################################")
        for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
            print("\nProb.  - Predict")

            for (pd, pb) in zip(pred, prob):
                print(f"{pb:.4f} - {pd}")

            cv2.imshow(f"Image {i + 1}", pp.adjust_to_see(img))
        print("\n####################################")
        cv2.waitKey(0)

    else:
        assert os.path.isfile(source_path) or os.path.isfile(target_path)
        os.makedirs(output_path, exist_ok=True)
Exemple #3
0
                        mask = model.generate_square_subsequent_mask(i + 1).to(
                            'cuda')
                        trg_tensor = torch.LongTensor(out_indexes).unsqueeze(
                            1).to(device)
                        output = model.vocab(
                            model.transformer.decoder(model.query_pos(
                                model.decoder(trg_tensor)),
                                                      memory,
                                                      tgt_mask=mask))
                        out_token = output.argmax(2)[-1].item()
                        out_indexes.append(out_token)
                        # print(output.shape)
                        if out_token == 3:
                            break

                    predicts.append(tokenizer.decode(out_indexes))
                    gt.append(tokenizer.decode(trg.flatten(0, 1)))

            predicts = list(
                map(lambda x: x.replace('SOS', '').replace('EOS', ''),
                    predicts))
            gt = list(
                map(lambda x: x.replace('SOS', '').replace('EOS', ''), gt))

            evaluate = evaluation.ocr_metrics(
                predicts=predicts,
                ground_truth=gt,
                norm_accentuation=args.norm_accentuation,
                norm_punctuation=args.norm_punctuation)
            print(
                "Calculate Character Error Rate {}, Word Error Rate {} and Sequence Error Rate {}"