def testrun(args, parser): """TESTRUN Reserve pages, run pages and optionally upload pages """ emop_submit = EmopSubmit(args.config_path) # Do not run testrun subcommand if not in a valid cluster job environment # This prevents accidentally running resource intensive program on login nodes if not emop_submit.scheduler.is_job_environment(): print("Can only use testrun subcommand from within a cluster job environment") sys.exit(1) # Reserve pages equal to --num-pages proc_id = emop_submit.reserve(num_pages=args.testrun_num_pages, r_filter=args.filter) if not proc_id: print("Failed to reserve pages") sys.exit(1) # Run reserved pages emop_run = EmopRun(args.config_path, proc_id) run_status = emop_run.run(force=True) if not run_status: sys.exit(1) # Exit if --no-upload if args.testrun_no_upload: sys.exit(0) # Upload results emop_upload = EmopUpload(args.config_path) upload_status = emop_upload.upload_proc_id(proc_id=proc_id) if not upload_status: sys.exit(1) sys.exit(0)
def setUp(self): self.popen_patcher = mock.patch("emop.lib.utilities.subprocess32.Popen") self.mock_popen = self.popen_patcher.start() self.mock_rv = mock.Mock() self.mock_rv.communicate.return_value = ["", ""] self.mock_rv.returncode = 0 self.mock_popen.return_value = self.mock_rv os.environ["SLURM_JOB_ID"] = "2" self.run = EmopRun(config_path=default_config_path(), proc_id="0001")
def run(args, parser): """run command The run command is intended to be executed on a compute node. This command performs the actual work of OCRing pages based on the supplied --proc-id value. By default the run command will not run if it detects the PROC_ID has already run, but this can be modified with --force-run. This is useful when a batch job has been requeued. """ emop_run = EmopRun(args.config_path, args.proc_id) # Do not use run subcommand if not in a valid cluster job environment # This prevents accidentally running resource intensive program on login nodes if not emop_run.scheduler.is_job_environment(): print("Can only use run subcommand from within a cluster job environment") sys.exit(1) run_status = emop_run.run(force=args.force_run) if run_status: sys.exit(0) else: sys.exit(1)
class TestEmopRun(TestCase): def setUp(self): self.popen_patcher = mock.patch("emop.lib.utilities.subprocess32.Popen") self.mock_popen = self.popen_patcher.start() self.mock_rv = mock.Mock() self.mock_rv.communicate.return_value = ["", ""] self.mock_rv.returncode = 0 self.mock_popen.return_value = self.mock_rv os.environ["SLURM_JOB_ID"] = "2" self.run = EmopRun(config_path=default_config_path(), proc_id="0001") def tearDown(self): self.popen_patcher.stop() @pytest.fixture(autouse=True) def setup_files(self, tmpdir): self.tmpdir = tmpdir os.environ["TMPDIR"] = str(self.tmpdir) def test_append_result_failed(self): settings = default_settings() job = mock_emop_job(settings) self.run.payload.save_output = mock.MagicMock() self.run.append_result(job=job, results="Test", failed=True) payload_save_args, payload_save_kwargs = self.run.payload.save_output.call_args actual_failed_results = payload_save_kwargs["data"]["job_queues"]["failed"] actual_completed_results = payload_save_kwargs["data"]["job_queues"]["completed"] expected_failed = {"id": job.id, "results": "SLURM JOB 2: Test"} self.assertEqual(1, len(actual_failed_results)) self.assertEqual(0, len(actual_completed_results)) self.assertEqual(expected_failed, actual_failed_results[0]) self.assertTrue(self.run.payload.save_output.called) def test_append_result_completed(self): settings = default_settings() job = mock_emop_job(settings) self.run.payload.save_output = mock.MagicMock() self.run.append_result(job=job, results=None) payload_save_args, payload_save_kwargs = self.run.payload.save_output.call_args actual_failed_results = payload_save_kwargs["data"]["job_queues"]["failed"] actual_completed_results = payload_save_kwargs["data"]["job_queues"]["completed"] expected_completed = [job.id] self.assertEqual(0, len(actual_failed_results)) self.assertEqual(1, len(actual_completed_results)) self.assertItemsEqual(expected_completed, actual_completed_results) self.assertTrue(self.run.payload.save_output.called) def test_get_results(self): self.run.jobs_completed.append(1) self.run.jobs_failed.append({"id": 2, "results": "test"}) self.run.page_results.append({"batch_id": 1, "page_id": 2}) self.run.postproc_results.append({"batch_job_id": 1, "page_id": 2}) expected_value = { "extra_transfers": [], "font_training_results": [], "job_queues": {"completed": [1], "failed": [{"id": 2, "results": "test"}]}, "page_results": [{"batch_id": 1, "page_id": 2}], "postproc_results": [{"batch_job_id": 1, "page_id": 2}], } actual_value = self.run.get_results() self.assertEqual(expected_value, actual_value) def test_do_process_page_corrector(self): settings = default_settings() job = mock_emop_job(settings) page_corrector = PageCorrector(job=job) page_corrector.run = mock.MagicMock() results = mock_results_tuple() page_corrector.should_run = mock.MagicMock() page_corrector.should_run.return_value = True page_corrector.run.return_value = results(stdout=None, stderr=None, exitcode=0) retval = self.run.do_process(obj=page_corrector, job=job) self.assertTrue(page_corrector.run.called) self.assertTrue(retval) def test_do_process_page_corrector_failed(self): settings = default_settings() job = mock_emop_job(settings) page_corrector = PageCorrector(job=job) page_corrector.run = mock.MagicMock() results = mock_results_tuple() page_corrector.should_run = mock.MagicMock() page_corrector.should_run.return_value = True page_corrector.run.return_value = results(stdout=None, stderr="Test", exitcode=1) self.run.append_result = mock.MagicMock() retval = self.run.do_process(obj=page_corrector, job=job) self.run.append_result.assert_called_with(job=job, results="PageCorrector Failed: Test", failed=True) self.assertFalse(retval) def test_do_process_page_corrector_skipped(self): settings = default_settings() job = mock_emop_job(settings) page_corrector = PageCorrector(job=job) page_corrector.run = mock.MagicMock() results = mock_results_tuple() page_corrector.run.return_value = results(stdout=None, stderr="Test", exitcode=1) flexmock(page_corrector).should_receive("should_run").and_return(False) self.run.append_result = mock.MagicMock() retval = self.run.do_process(obj=page_corrector, job=job) self.assertFalse(self.run.append_result.called) self.assertTrue(retval) def test_do_process_page_corrector_not_skipped(self): settings = default_settings() self.run.settings.controller_skip_existing = False job = mock_emop_job(settings) page_corrector = PageCorrector(job=job) page_corrector.run = mock.MagicMock() results = mock_results_tuple() page_corrector.run.return_value = results(stdout=None, stderr=None, exitcode=0) page_corrector.should_run = mock.MagicMock() self.run.append_result = mock.MagicMock() retval = self.run.do_process(obj=page_corrector, job=job) self.assertFalse(page_corrector.should_run.called) self.assertTrue(retval) def test_do_ocr_tesseract(self): settings = default_settings() job = mock_emop_job(settings) results = mock_results_tuple() tesseract = Tesseract(job=job) results = mock_results_tuple() expected_results = results(stdout=None, stderr=None, exitcode=0) flexmock(os.path).should_receive("isfile").with_args(job.image_path).and_return(True) mock_mkdirs(job.output_dir) flexmock(os.path).should_receive("isfile").with_args(job.txt_file).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.hocr_file).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.xml_file).and_return(True) flexmock(tesseract).should_receive("run").and_return(expected_results) retval = self.run.do_ocr(job=job) self.assertTrue(retval) def test_do_ocr_tesseract_failed(self): settings = default_settings() job = mock_emop_job(settings) job.page_result.ocr_text_path_exists = False job.page_result.ocr_xml_path_exists = False results = mock_results_tuple() tesseract = Tesseract(job=job) results = mock_results_tuple() expected_results = "tesseract OCR Failed: Could not find page image %s" % job.image_path flexmock(os.path).should_receive("isfile").with_args(job.txt_file).and_return(False) flexmock(os.path).should_receive("isfile").with_args(job.xml_file).and_return(False) flexmock(tesseract).should_receive("should_run").and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.image_path).and_return(False) flexmock(tesseract).should_receive("run") self.run.append_result = mock.MagicMock() retval = self.run.do_ocr(job=job) self.run.append_result.assert_called_with(job=job, results=expected_results, failed=True) self.assertFalse(retval) def test_do_ocr_tesseract_skipped(self): settings = default_settings() job = mock_emop_job(settings) results = mock_results_tuple() tesseract = Tesseract(job=job) flexmock(os.path).should_receive("isfile").with_args(job.txt_file).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.xml_file).and_return(True) flexmock(tesseract).should_receive("should_run").and_return(False) flexmock(tesseract).should_receive("run") self.run.append_result = mock.MagicMock() retval = self.run.do_ocr(job=job) self.assertFalse(self.run.append_result.called) self.assertTrue(retval) # This test doesn't correctly validate should_run is not called. # When self.run.settings.controller_skip_existing is not set to False # the test still passes # @skipif(True, reason="Does not work") def test_do_ocr_tesseract_not_skipped(self): settings = default_settings() self.run.settings.controller_skip_existing = False job = mock_emop_job(settings) results = mock_results_tuple() tesseract = Tesseract(job=job) flexmock(os.path).should_receive("isdir").with_args(tesseract.output_parent_dir).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.txt_file).and_return(False) flexmock(os.path).should_receive("isfile").with_args(job.xml_file).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.hocr_file).and_return(True) flexmock(os.path).should_receive("isfile").with_args(job.image_path).and_return(True) flexmock(tesseract).should_receive("should_run").never() flexmock(tesseract).should_receive("run") retval = self.run.do_ocr(job=job) self.assertTrue(retval) def test_do_postprocesses(self): settings = default_settings() job = mock_emop_job(settings) # denoise = Denoise(job=job) # multi_column_skew = MultiColumnSkew(job=job) # xml_to_text_proc = XML_To_Text(job=job) # page_evaluator = PageEvaluator(job=job) # page_corrector = PageCorrector(job=job) # juxta_compare = JuxtaCompare(job=job) # These mocks don't work for some reason # flexmock(self.run).should_receive("do_process").with_args(obj=denoise, job=job).and_return(True) # flexmock(self.run).should_receive("do_process").with_args(obj=multi_column_skew, job=job).and_return(True) # flexmock(self.run).should_receive("do_process").with_args(obj=xml_to_text_proc, job=job).and_return(True) # flexmock(self.run).should_receive("do_process").with_args(obj=page_evaluator, job=job).and_return(True) # flexmock(self.run).should_receive("do_process").with_args(obj=page_corrector, job=job).and_return(True) # flexmock(self.run).should_receive("do_process").with_args(obj=juxta_compare, job=job).and_return(True) flexmock(self.run).should_receive("do_process").and_return(True) retval = self.run.do_postprocesses(job=job) self.assertTrue(retval) def test_do_postprocesses_failed(self): settings = default_settings() job = mock_emop_job(settings) flexmock(self.run).should_receive("do_process").and_return(False) retval = self.run.do_postprocesses(job=job) self.assertFalse(retval) def test_do_job(self): settings = default_settings() job = mock_emop_job(settings) flexmock(self.run).should_receive("do_ocr").and_return(True) flexmock(self.run).should_receive("do_postprocesses").and_return(True) retval = self.run.do_job(job=job) self.assertTrue(retval) def test_do_job_failed_ocr(self): settings = default_settings() job = mock_emop_job(settings) flexmock(self.run).should_receive("do_ocr").and_return(False) flexmock(self.run).should_receive("do_postprocesses").and_return(True) retval = self.run.do_job(job=job) self.assertFalse(retval) def test_do_job_failed_postprocesses(self): settings = default_settings() job = mock_emop_job(settings) flexmock(self.run).should_receive("do_ocr").and_return(True) flexmock(self.run).should_receive("do_postprocesses").and_return(False) retval = self.run.do_job(job=job) self.assertFalse(retval)