class TextRecognizer: def __init__(self, checkpoint_path="./py/checkpoint_recognizer.hdf5", input_size=(1024, 128, 1), max_text_length=128, charset_base=string.printable[:95], architecture="flor"): self.tokenizer = None self.model = None self.checkpoint_path = checkpoint_path self.input_size = input_size self.max_text_length = max_text_length self.charset_base = charset_base self.architecture = architecture ml_utils.limit_gpu_memory() self.load_model() def load_model(self): self.tokenizer = Tokenizer(chars=self.charset_base, max_text_length=self.max_text_length) self.model = HTRModel(architecture=self.architecture, input_size=self.input_size, vocab_size=self.tokenizer.vocab_size, top_paths=10) self.model.compile() self.model.load_checkpoint(target=self.checkpoint_path) def read_all_text_from_images(self, images): output = [] for img in images: output.append(self.read_text_from_image(img)) return output def read_text_from_image(self, img): img = pp.preprocess(img, input_size=self.input_size) x_test = pp.normalization([img]) predicts, probabilities = self.model.predict(x_test, ctc_decode=True) predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts] for i, (pred, prob) in enumerate(zip(predicts, probabilities)): return pred[0] return ""
elif args.image: tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) img = pp.preproc(args.image, input_size=input_size) x_test = pp.normalization([img]) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=tokenizer.vocab_size, top_paths=10) model.compile() model.load_checkpoint(target=target_path) predicts, probabilities = model.predict(x_test, ctc_decode=True) predicts = [[tokenizer.decode(x) for x in y] for y in predicts] print("\n####################################") for i, (pred, prob) in enumerate(zip(predicts, probabilities)): print("\nProb. - Predict") for (pd, pb) in zip(pred, prob): print(f"{pb:.4f} - {pd}") cv2.imshow(f"Image {i + 1}", pp.adjust_to_see(img)) print("\n####################################") cv2.waitKey(0) else: assert os.path.isfile(source_path) or os.path.isfile(target_path)
# get default callbacks and load checkpoint weights file (HDF5) if exists model.load_checkpoint(target=target_path) callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1) # TODO PREDICT from data import preproc as pp # from google.colab.patches import cv2_imshow start_time = datetime.datetime.now() # predict() function will return the predicts with the probabilities predicts, _ = model.predict(x=dtgen.next_test_batch(), steps=dtgen.steps['test'], ctc_decode=True, verbose=1) # decode to string predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts] ground_truth = [x.decode() for x in dtgen.dataset['test']['gt']] total_time = datetime.datetime.now() - start_time # mount predict corpus file with open(os.path.join(output_path, "predict.txt"), "w") as lg: for pd, gt in zip(predicts, ground_truth): lg.write(f"TE_L {gt}\nTE_P {pd}\n") # for i, item in enumerate(dtgen.dataset['test']['dt'][:10]): # print("=" * 1024, "\n")
f"Time per item: {time_epoch / total_item}\n", f"Total epochs: {len(loss)}", f"Best epoch {min_val_loss_i + 1}\n", f"Training loss: {loss[min_val_loss_i]:.8f}", f"Validation loss: {min_val_loss:.8f}" ]) with open(os.path.join(output_path, "train.txt"), "w") as lg: lg.write(t_corpus) print(t_corpus) elif args.test: start_time = datetime.datetime.now() predicts, _ = model.predict(x=ds.getNext(), steps=ds.valid_steps, ctc_decode=True, verbose=1) predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts] total_time = datetime.datetime.now() - start_time with open(os.path.join(output_path, "predict.txt"), "w") as lg: for pd, gt in zip(predicts, dtgen.dataset['test']['gt']): lg.write(f"TE_L {gt}\nTE_P {pd}\n") evaluate = evaluation.ocr_metrics( predicts=predicts, ground_truth=dtgen.dataset['test']['gt'], norm_accentuation=args.norm_accentuation, norm_punctuation=args.norm_punctuation)
tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) img = pp.preprocess(args.image, input_size=input_size) x_test = pp.normalization([img]) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=tokenizer.vocab_size, beam_width=10, top_paths=10) model.compile(learning_rate=0.001) model.load_checkpoint(target=target_path) predicts, probabilities = model.predict(x_test, ctc_decode=True) predicts = [[tokenizer.decode(x) for x in y] for y in predicts] print("\n####################################") for i, (pred, prob) in enumerate(zip(predicts, probabilities)): print("\nProb. - Predict") for (pd, pb) in zip(pred, prob): print(f"{pb:.4f} - {pd}") cv2.imshow(f"Image {i + 1}", cv2.imread(args.image)) print("\n####################################") cv2.waitKey(0) else: assert os.path.isfile(source_path) or os.path.isfile(target_path)