def test_model_zoo(self): version = '1.0' url = f"https://github.com/Calamari-OCR/calamari_models/archive/{version}.tar.gz" with tempfile.TemporaryDirectory() as d: d = 'model_archive_permanent' # for debugging os.makedirs(d, exist_ok=True) os.chdir(d) if not os.path.exists('calamari_models'): check_call([ 'sh', '-c', ' '.join([ 'wget', '-q', '-O', '-', url, '|', 'tar', 'xz', '&&', 'mv', f'calamari_models-{version}', 'calamari_models' ]) ]) trainer_params = uw3_trainer_params(with_validation=True) args = PredictAndEvalArgs( checkpoint=glob( os.path.join('calamari_models', 'antiqua_modern', '*.ckpt.json')), predictor=PredictorParams(pipeline=DataPipelineParams( batch_size=5)), data=trainer_params.gen.val_gen(), ) full_evaluation = predict_and_eval_main(args) self.assertLess( full_evaluation['voted']['eval']['avg_ler'], 0.001, "The accuracy on the test data must be below 0.1%")
def test_predict_and_eval_uw3_with_voting(self): from calamari_ocr.test.test_train_file import uw3_trainer_params checkpoint = os.path.join(this_dir, "models", "best.ckpt") trainer_params = uw3_trainer_params(with_validation=True) args = PredictAndEvalArgs( checkpoint=[checkpoint, checkpoint, checkpoint], predictor=PredictorParams(pipeline=DataPipelineParams( batch_size=5)), data=trainer_params.gen.val_gen(), ) main(args)
def test_predict_and_eval_hdf5(self): from calamari_ocr.test.test_train_hdf5 import default_trainer_params checkpoint = os.path.join(this_dir, "models", "best.ckpt") trainer_params = default_trainer_params(with_validation=True) args = PredictAndEvalArgs( checkpoint=[checkpoint], predictor=PredictorParams(pipeline=DataPipelineParams( num_processes=1)), data=trainer_params.gen.val_gen(), ) main(args)
def test_model_zoo(self): version = "1.0" url = f"https://github.com/Calamari-OCR/calamari_models/archive/{version}.tar.gz" with tempfile.TemporaryDirectory() as d: d = "model_archive_permanent" # for debugging os.makedirs(d, exist_ok=True) os.chdir(d) if not os.path.exists("calamari_models"): check_call( [ "sh", "-c", " ".join( [ "wget", "-q", "-O", "-", url, "|", "tar", "xz", "&&", "mv", f"calamari_models-{version}", "calamari_models", ] ), ] ) trainer_params = uw3_trainer_params(with_validation=True) args = PredictAndEvalArgs( checkpoint=glob(os.path.join("calamari_models", "antiqua_modern", "*.ckpt.json")), predictor=PredictorParams(pipeline=DataPipelineParams(batch_size=5)), data=trainer_params.gen.val_gen(), ) full_evaluation = predict_and_eval_main(args) self.assertLess( full_evaluation["voted"]["eval"]["avg_ler"], 0.001, "The accuracy on the test data must be below 0.1%", )