def load_model(self): self.tokenizer = Tokenizer(chars=self.charset_base, max_text_length=self.max_text_length) self.model = HTRModel(architecture=self.architecture, input_size=self.input_size, vocab_size=self.tokenizer.vocab_size, top_paths=10) self.model.compile() self.model.load_checkpoint(target=self.checkpoint_path)
class TextRecognizer: def __init__(self, checkpoint_path="./py/checkpoint_recognizer.hdf5", input_size=(1024, 128, 1), max_text_length=128, charset_base=string.printable[:95], architecture="flor"): self.tokenizer = None self.model = None self.checkpoint_path = checkpoint_path self.input_size = input_size self.max_text_length = max_text_length self.charset_base = charset_base self.architecture = architecture ml_utils.limit_gpu_memory() self.load_model() def load_model(self): self.tokenizer = Tokenizer(chars=self.charset_base, max_text_length=self.max_text_length) self.model = HTRModel(architecture=self.architecture, input_size=self.input_size, vocab_size=self.tokenizer.vocab_size, top_paths=10) self.model.compile() self.model.load_checkpoint(target=self.checkpoint_path) def read_all_text_from_images(self, images): output = [] for img in images: output.append(self.read_text_from_image(img)) return output def read_text_from_image(self, img): img = pp.preprocess(img, input_size=self.input_size) x_test = pp.normalization([img]) predicts, probabilities = self.model.predict(x_test, ctc_decode=True) predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts] for i, (pred, prob) in enumerate(zip(predicts, probabilities)): return pred[0] return ""
predicts = [''] * len(dt) if os.path.isfile(predict_file): with open(predict_file, "r") as lg: predicts = [line[5:] for line in lg if line.startswith("TE_P")] for x in range(len(dt)): print(f"Image shape:\t{dt[x].shape}") print(f"Ground truth:\t{gt[x].decode()}") print(f"Predict:\t{predicts[x]}\n") cv2.imshow("img", pp.adjust_to_see(dt[x])) cv2.waitKey(0) elif args.image: tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) img = pp.preproc(args.image, input_size=input_size) x_test = pp.normalization([img]) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=tokenizer.vocab_size, top_paths=10) model.compile() model.load_checkpoint(target=target_path) predicts, probabilities = model.predict(x_test, ctc_decode=True) predicts = [[tokenizer.decode(x) for x in y] for y in predicts]
source_path = os.path.join("..", "data", f"{args.source}.hdf5") if args.no_aug: source_path = os.path.join("..", "data", f"{args.source}_non_augmented.hdf5") output_path = os.path.join( "..", "output", args.source, ) target_path = os.path.join(output_path, "checkpoint_weights.pt") input_size = (128, 128, 1) max_text_length = 16 charset_base = string.printable[:95] tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) if args.transform: print(f"{args.source} dataset will be transformed...") ds = Dataset(source=raw_path, name=args.source) ds.read_partitions() print("Partitions will be preprocessed...") ds.preprocess_partitions(input_size=input_size, no_aug=args.no_aug) print("Partitions will be saved...") os.makedirs(os.path.dirname(source_path), exist_ok=True) for i in ds.partitions: with h5py.File(source_path, "a") as hf: