def test_3pg_no_ocr(self, mocks):
     init_doc(FakePdf("..."))
     trigger_processing()
     assert mocks["page_extracted"].call_count == 3
     # No pages need to be OCRd when text is extractable
     assert mocks["page_ocrd"].call_count == 0
     assert mocks["page_text_position_extracted"].call_count == 3
    def test_redaction(self, mocks):
        init_doc(FakePdf("..."))
        trigger_processing()

        # Redact the middle page
        reset_mocks(mocks)
        trigger_redacting([1])
        # Only 1 page should be processed, but the cache is rebuilt
        assert mocks["cache_miss"].call_count == 3
        assert mocks["page_loaded"].call_count == 2  # 1 + 1
        assert mocks["page_extracted"].call_count == 1
        assert mocks["page_ocrd"].call_count == 1
        assert mocks["page_text_position_extracted"].call_count == 1
    def test_cache_dirty_batching(self, mocks):
        init_doc(FakePdf("....."))
        trigger_processing()

        # Redact the last two pages
        reset_mocks(mocks)
        trigger_redacting([3, 4])

        # Cache should only be written once
        assert mocks["cache_written"].call_count == 1

        # Cache should be missed number of pages times
        assert mocks["cache_miss"].call_count == 5

        # Two pages should be extracted
        assert mocks["page_extracted"].call_count == 2
        assert mocks["page_ocrd"].call_count == 2
        assert mocks["page_text_position_extracted"].call_count == 2
    def test_cache_misses(self, mocks):
        init_doc(FakePdf("..."))
        trigger_processing()

        assert mocks["cache_miss"].call_count == 3
 def test_5pg_half_ocr(self, mocks):
     init_doc(FakePdf(".o.o."))
     trigger_processing()
     assert mocks["page_extracted"].call_count == 5
     assert mocks["page_ocrd"].call_count == 2
     assert mocks["page_text_position_extracted"].call_count == 5
 def test_3pg_ocr(self, mocks):
     init_doc(FakePdf("ooo"))
     trigger_processing()
     assert mocks["page_extracted"].call_count == 3
     assert mocks["page_ocrd"].call_count == 3
     assert mocks["page_text_position_extracted"].call_count == 3
    def test_cache_batching(self, mocks):
        init_doc(FakePdf("....."))
        trigger_processing()

        # Cache should be written only once
        assert mocks["cache_written"].call_count == 1