def test_prediction_hdf5(self): run_predict( predict_args(data=Hdf5(files=[ os.path.join(this_dir, "data", "uw3_50lines", "uw3-50lines.h5") ], ))) run_eval( eval_args(gt_data=Hdf5(files=[ os.path.join(this_dir, "data", "uw3_50lines", "uw3-50lines.h5") ], )))
def test_eval_pagexml(self): run_predict( predict_args(data=PageXML(images=[ os.path.join(this_dir, "data", "avicanon_pagexml", "008.nrm.png") ], ))) run_eval( eval_args(gt_data=PageXML(xml_files=[ os.path.join(this_dir, "data", "avicanon_pagexml", "008.xml") ], )))
def test_prediction_abbyy(self): run_predict( predict_args(data=Abbyy(images=[ os.path.join(this_dir, "data", "hiltl_die_bank_des_verderbens_abbyyxml", "*.jpg") ], ))) run_eval( eval_args(gt_data=Abbyy(xml_files=[ os.path.join(this_dir, "data", "hiltl_die_bank_des_verderbens_abbyyxml", "*.abbyy.xml") ], )))
def test_eval_files_with_different_sources(self): run_predict( predict_args(data=FileDataParams( pred_extension=".ext-pred.txt", images=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.png") ])), ))) r = run_eval( eval_args( gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ]))), pred_data=FileDataParams(texts=sorted( glob_all([ os.path.join( this_dir, "data", "uw3_50lines", "test", "*.ext-pred.txt", ) ]))), )) self.assertLess(r["avg_ler"], 0.0009, msg="Current best model yields about 0.09% CER")
def test_prediction_files_with_different_extension(self): run_predict( predict_args(data=FileDataParams( pred_extension='.ext-pred.txt', images=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.png") ]))))) run_eval( eval_args(gt_data=FileDataParams( pred_extension='.ext-pred.txt', texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ])))))
def test_eval_hdf5(self): run_predict( predict_args(data=Hdf5(files=[ os.path.join(this_dir, "data", "uw3_50lines", "uw3-50lines.h5") ], ))) r = run_eval( eval_args(gt_data=Hdf5(files=[ os.path.join(this_dir, "data", "uw3_50lines", "uw3-50lines.h5") ], ))) self.assertLess(r["avg_ler"], 0.006, msg="Current best model yields about 0.55% CER")
def test_prediction_files(self): run_predict( predict_args(data=FileDataParams(images=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.png") ]))))) run_eval( eval_args(gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ]))))) args = eval_args(gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ])))) with tempfile.TemporaryDirectory() as d: args.xlsx_output = os.path.join(d, 'output.xlsx') run_eval(args)
def test_eval_list_files(self): run_predict( predict_args(data=FileDataParams(images=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test.files") ]))))) r = run_eval( eval_args(gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test.gt.files") ]))))) self.assertLess(r["avg_ler"], 0.0009, msg="Current best model yields about 0.09% CER")
def test_eval_files(self): run_predict( predict_args(data=FileDataParams(images=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.png") ]))))) r = run_eval( eval_args(gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ]))))) self.assertLess(r["avg_ler"], 0.0009, msg="Current best model yields about 0.09% CER") args = eval_args(gt_data=FileDataParams(texts=sorted( glob_all([ os.path.join(this_dir, "data", "uw3_50lines", "test", "*.gt.txt") ])))) with tempfile.TemporaryDirectory() as d: args.xlsx_output = os.path.join(d, "output.xlsx") run_eval(args)