def test_ocr_text_color(image, color, expected, region): frame = load_image(image) mode = stbt.OcrMode.SINGLE_LINE assert expected not in stbt.ocr(frame, region, mode) assert expected == stbt.ocr(frame, region, mode, text_color=color) assert not stbt.match_text(expected, frame, region, mode) assert stbt.match_text(expected, frame, region, mode, text_color=color)
def test_that_ligatures_and_ambiguous_punctuation_are_normalised(): frame = load_image('ocr/ambig.png') text = stbt.ocr(frame) for bad, good in [ # tesseract 3.02 ("horizonta|", "horizontal"), # tesseract 4.00 with tessdata 590567f ("siIIyness", "sillyness"), ("Iigatures", "ligatures"), ]: text = text.replace(bad, good) assert ligature_text == text assert stbt.match_text("em-dash,", frame) assert stbt.match_text(u"em\u2014dash,", frame)
def test_that_match_text_gives_tesseract_a_hint(): frame = load_image("ocr/itv-player.png") if "ITV Player" in stbt.ocr(frame=frame): raise SkipTest("Tesseract doesn't need a hint") if "ITV Player" not in stbt.ocr(frame=frame, tesseract_user_words=["ITV"]): raise SkipTest("Giving tesseract a hint doesn't help") assert stbt.match_text("ITV Player", frame=frame)
def test_that_text_region_is_correct_even_with_regions_larger_than_frame(): frame = load_image("ocr/menu.png") text, region, _ = list(iterate_menu())[6] result = stbt.match_text( text, frame=frame, region=region.extend(right=+12800)) assert result assert region.contains(result.region)
def test_ocr_doesnt_leak_python_future_newtypes(): f = load_image("ocr/small.png") result = stbt.ocr(f) assert type(result).__name__ in ["str", "unicode"] result = stbt.match_text("Small", f) assert type(result.text).__name__ in ["str", "unicode"]
def test_match_text_stringify_result(): frame = load_image("ocr/menu.png") result = stbt.match_text(u"Onion Bhaji", frame=frame) assert re.match( r"TextMatchResult\(time=None, match=True, region=Region\(.*\), " r"frame=<Image\(filename=u?'ocr/menu.png', " r"dimensions=1280x720x3\)>, text=u?'Onion Bhaji'\)", str(result))
def test_that_match_text_still_returns_if_region_doesnt_intersect_with_frame( region): frame = load_image("ocr/menu.png") result = stbt.match_text("Onion Bhaji", frame=frame, region=region) assert result.match is False assert result.region is None assert result.text == "Onion Bhaji" # Avoid future.types.newtypes in return values assert type(result.text).__name__ in ["str", "unicode"]
def test_ocr_debug(): # So that the output directory name doesn't depend on how many tests # were run before this one. ImageLogger._frame_number = itertools.count(1) # pylint:disable=protected-access f = stbt.load_image("action-panel.png") r = stbt.Region(0, 370, right=1280, bottom=410) c = (235, 235, 235) with scoped_curdir(), scoped_debug_level(2): stbt.ocr(f) stbt.ocr(f, region=r) stbt.ocr(f, region=r, text_color=c) stbt.match_text("Summary", f) # no match stbt.match_text("Summary", f, region=r) # no match stbt.match_text("Summary", f, region=r, text_color=c) files = subprocess.check_output("find stbt-debug | sort", shell=True) \ .decode("utf-8") assert files == dedent("""\ stbt-debug stbt-debug/00001 stbt-debug/00001/index.html stbt-debug/00001/source.png stbt-debug/00001/tessinput.png stbt-debug/00001/upsampled.png stbt-debug/00002 stbt-debug/00002/index.html stbt-debug/00002/source.png stbt-debug/00002/tessinput.png stbt-debug/00002/upsampled.png stbt-debug/00003 stbt-debug/00003/index.html stbt-debug/00003/source.png stbt-debug/00003/tessinput.png stbt-debug/00003/text_color_difference.png stbt-debug/00003/text_color_threshold.png stbt-debug/00003/upsampled.png stbt-debug/00004 stbt-debug/00004/index.html stbt-debug/00004/source.png stbt-debug/00004/tessinput.png stbt-debug/00004/upsampled.png stbt-debug/00005 stbt-debug/00005/index.html stbt-debug/00005/source.png stbt-debug/00005/tessinput.png stbt-debug/00005/upsampled.png stbt-debug/00006 stbt-debug/00006/index.html stbt-debug/00006/source.png stbt-debug/00006/tessinput.png stbt-debug/00006/text_color_difference.png stbt-debug/00006/text_color_threshold.png stbt-debug/00006/upsampled.png """)
def test_match_text_case_sensitivity(): frame = load_image("ocr/menu.png", cv2.IMREAD_GRAYSCALE) assert stbt.match_text("ONION BHAJI", frame) assert stbt.match_text("ONION BHAJI", frame, case_sensitive=False) assert not stbt.match_text("ONION BHAJI", frame, case_sensitive=True)
def test_match_text_on_single_channel_image(): frame = load_image("ocr/menu.png", cv2.IMREAD_GRAYSCALE) assert stbt.match_text("Onion Bhaji", frame)
def test_that_match_text_returns_no_match_for_non_matching_text(): frame = load_image("ocr/menu.png") assert not stbt.match_text(u"Noodle Soup", frame=frame)
def test_match_text_case_sensitivity(): frame = load_image("ocr/menu.png", color_channels=1) assert stbt.match_text("ONION BHAJI", frame) assert stbt.match_text("ONION BHAJI", frame, case_sensitive=False) assert not stbt.match_text("ONION BHAJI", frame, case_sensitive=True)
def test_that_default_language_is_configurable(): f = load_image("ocr/unicode.png") assert not stbt.match_text(u"Röthlisberger", f) # reads Réthlisberger with temporary_config({"ocr.lang": "deu"}): assert stbt.match_text(u"Röthlisberger", f) assert u"Röthlisberger" in stbt.ocr(f)
def test_that_match_text_accepts_unicode(): f = load_image("ocr/unicode.png") assert stbt.match_text("David", f, lang='eng+deu') # ascii assert stbt.match_text("Röthlisberger", f, lang='eng+deu') # unicode assert stbt.match_text("Röthlisberger".encode('utf-8'), f, lang='eng+deu') # utf-8 bytes
def match_text(): return stbt.match_text("RED", frame=frame)
def test(text, region, upsample): result = stbt.match_text(text, frame=frame, upsample=upsample) assert result assert region.contains(result.region) # pylint:disable=no-member
def test_that_ocr_region_none_isnt_allowed(region): f = load_image("ocr/small.png") with pytest.raises((TypeError, ValueError)): stbt.ocr(frame=f, region=region) with pytest.raises((TypeError, ValueError)): stbt.match_text("Small", frame=f, region=region)
def test_match_text_on_single_channel_image(): frame = load_image("ocr/menu.png", color_channels=1) assert stbt.match_text("Onion Bhaji", frame)