Ejemplo n.º 1
0
    def __init__(self, graph=None, mask=None, navigate_timeout=20):
        if graph is not None:
            raise ValueError(
                "The `graph` parameter of `stbt.Keyboard` constructor is "
                "deprecated. See the API documentation for details.")
        self.G = nx.DiGraph()
        self.G_ = None  # navigation without shift transitions that type text
        self.modes = set()

        self.mask = None
        if isinstance(mask, numpy.ndarray):
            self.mask = mask
        elif mask:
            self.mask = load_image(mask)

        self.navigate_timeout = navigate_timeout

        self.symmetrical_keys = {
            "KEY_DOWN": "KEY_UP",
            "KEY_UP": "KEY_DOWN",
            "KEY_LEFT": "KEY_RIGHT",
            "KEY_RIGHT": "KEY_LEFT",
        }

        self._any_with_region = False
        self._any_without_region = False
        self._any_with_mode = False
        self._any_without_mode = False
Ejemplo n.º 2
0
def test_that_text_region_is_correct_even_with_regions_larger_than_frame():
    frame = load_image("ocr/menu.png")
    text, region, _ = list(iterate_menu())[6]
    result = stbt.match_text(
        text, frame=frame, region=region.extend(right=+12800))
    assert result
    assert region.contains(result.region)
Ejemplo n.º 3
0
def test_ocr_doesnt_leak_python_future_newtypes():
    f = load_image("ocr/small.png")
    result = stbt.ocr(f)
    assert type(result).__name__ in ["str", "unicode"]

    result = stbt.match_text("Small", f)
    assert type(result.text).__name__ in ["str", "unicode"]
Ejemplo n.º 4
0
def test_that_match_text_gives_tesseract_a_hint():
    frame = load_image("ocr/itv-player.png")
    if "ITV Player" in stbt.ocr(frame=frame):
        raise SkipTest("Tesseract doesn't need a hint")
    if "ITV Player" not in stbt.ocr(frame=frame, tesseract_user_words=["ITV"]):
        raise SkipTest("Giving tesseract a hint doesn't help")
    assert stbt.match_text("ITV Player", frame=frame)
Ejemplo n.º 5
0
def test_match_text_stringify_result():
    frame = load_image("ocr/menu.png")
    result = stbt.match_text(u"Onion Bhaji", frame=frame)

    assert re.match(
        r"TextMatchResult\(time=None, match=True, region=Region\(.*\), "
        r"frame=<Image\(filename=u?'ocr/menu.png', "
        r"dimensions=1280x720x3\)>, text=u?'Onion Bhaji'\)", str(result))
Ejemplo n.º 6
0
def test_ocr_text_color(image, color, expected, region):
    frame = load_image(image)
    mode = stbt.OcrMode.SINGLE_LINE

    assert expected not in stbt.ocr(frame, region, mode)
    assert expected == stbt.ocr(frame, region, mode, text_color=color)

    assert not stbt.match_text(expected, frame, region, mode)
    assert stbt.match_text(expected, frame, region, mode, text_color=color)
Ejemplo n.º 7
0
def test_ocr_on_static_images(image, expected_text, region, mode):
    kwargs = {"region": region}
    if mode is not None:
        kwargs["mode"] = mode
    text = stbt.ocr(load_image("ocr/" + image), **kwargs)
    assert text == expected_text

    # Don't leak python future newtypes
    assert type(text).__name__ in ["unicode", "str"]
Ejemplo n.º 8
0
def test_tesseract_user_patterns(patterns):
    # pylint:disable=protected-access
    if _tesseract_version() < LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=patterns)
Ejemplo n.º 9
0
def test_that_setting_config_options_has_an_effect():
    # Unfortunately there are many tesseract config options and they are very
    # complicated so it's difficult to write a test that tests that a config
    # option is having the correct effect.  Due to the difficulty in determining
    # "correctness" instead here we test that setting a config option has an
    # effect at all.  This at least excercises our code which sets config
    # options.  I'm not happy about this and I hope to be able to replace this
    # once we have more experience with these settings in the real world.
    if _tesseract_version() >= LooseVersion('3.04'):
        hocr_mode_config = {
            "tessedit_create_txt": 0,
            "tessedit_create_hocr": 1
        }
    else:
        hocr_mode_config = {"tessedit_create_hocr": 1}

    assert (stbt.ocr(frame=load_image('ocr/ambig.png'),
                     tesseract_config=hocr_mode_config) !=
            stbt.ocr(frame=load_image('ocr/ambig.png')))
Ejemplo n.º 10
0
def test_that_match_text_still_returns_if_region_doesnt_intersect_with_frame(
        region):
    frame = load_image("ocr/menu.png")
    result = stbt.match_text("Onion Bhaji", frame=frame, region=region)
    assert result.match is False
    assert result.region is None
    assert result.text == "Onion Bhaji"

    # Avoid future.types.newtypes in return values
    assert type(result.text).__name__ in ["str", "unicode"]
Ejemplo n.º 11
0
def test_ocr_text_color_threshold():
    f = load_image("ocr/blue-search-white-guide.png")
    c = (220, 220, 220)
    assert stbt.ocr(f) != "Guide"
    # pylint:disable=fixme
    # TODO: Find an example where text_color_threshold is necessary. Since
    # tesseract 4.0.0 the default text_color_threshold actually works.
    # assert stbt.ocr(f, text_color=c) != "Guide"
    assert stbt.ocr(f, text_color=c, text_color_threshold=50) == "Guide"
    with temporary_config({'ocr.text_color_threshold': '50'}):
        assert stbt.ocr(f, text_color=c) == "Guide"
Ejemplo n.º 12
0
def _test_that_cache_speeds_up_ocr():
    frame = load_image('red-black.png')

    def ocr():
        return stbt.ocr(frame=frame)

    # pylint:disable=protected-access
    _cache = imgproc_cache._cache
    imgproc_cache._cache = None
    uncached_result = ocr()
    uncached_time = min(timeit.repeat(ocr, repeat=10, number=1))
    imgproc_cache._cache = _cache

    cached_result = ocr()  # prime the cache
    cached_time = min(timeit.repeat(ocr, repeat=10, number=1))

    print("ocr with cache: %s" % (cached_time, ))
    print("ocr without cache: %s" % (uncached_time, ))
    assert uncached_time > (cached_time * 10)
    assert type(cached_result) == type(uncached_result)  # pylint:disable=unidiomatic-typecheck
    assert cached_result == uncached_result

    r = stbt.Region(x=0, y=32, right=91, bottom=59)
    frame2 = load_image("red-black-2.png")

    def cached_ocr1():
        return stbt.ocr(frame=frame, region=r)

    def cached_ocr2():
        return stbt.ocr(frame=frame2, region=r)

    cached_ocr1()  # prime the cache
    time1 = timeit.timeit(cached_ocr1, number=1)
    time2 = timeit.timeit(cached_ocr2, number=1)

    print("ocr with cache (frame 1): %s" % (time1, ))
    print("ocr with cache (frame 2): %s" % (time2, ))
    assert time2 < (time1 * 10)
    assert cached_ocr1() == cached_ocr2()
Ejemplo n.º 13
0
def test_that_ocr_engine_has_an_effect():
    if _tesseract_version() < LooseVersion("4.0"):
        raise SkipTest('tesseract is too old')

    f = load_image("ocr/ambig.png")

    # This is a regression in tesseract 4.0's legacy engine, compared to 3.04:
    assert "sillyness" not in stbt.ocr(f, engine=stbt.OcrEngine.TESSERACT)
    assert "sillyness" not in stbt.ocr(f)

    # ...but the new LSTM engine does read it correctly:
    assert "sillyness" in stbt.ocr(f, engine=stbt.OcrEngine.LSTM)
    with temporary_config({'ocr.engine': 'LSTM'}):
        assert "sillyness" in stbt.ocr(f)
Ejemplo n.º 14
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    frame = load_image('ocr/ambig.png')
    text = stbt.ocr(frame)
    for bad, good in [
            # tesseract 3.02
        ("horizonta|", "horizontal"),
            # tesseract 4.00 with tessdata 590567f
        ("siIIyness", "sillyness"),
        ("Iigatures", "ligatures"),
    ]:
        text = text.replace(bad, good)
    assert ligature_text == text
    assert stbt.match_text("em-dash,", frame)
    assert stbt.match_text(u"em\u2014dash,", frame)
Ejemplo n.º 15
0
def test_corrections(corrections, expected):
    f = load_image('ocr/00.png')
    print(corrections)
    assert expected == stbt.ocr(frame=f, mode=stbt.OcrMode.SINGLE_WORD,
                                corrections=corrections)

    try:
        stbt.set_global_ocr_corrections({'OO': '11'})
        if expected == "OO":
            expected = "11"
        assert expected == stbt.ocr(frame=f, mode=stbt.OcrMode.SINGLE_WORD,
                                    corrections=corrections)
    finally:
        stbt.set_global_ocr_corrections({})
Ejemplo n.º 16
0
def test_that_text_location_is_recognised():
    frame = load_image("ocr/menu.png")

    def test(text, region, upsample):
        result = stbt.match_text(text, frame=frame, upsample=upsample)
        assert result
        assert region.contains(result.region)  # pylint:disable=no-member

    for text, region, multiline in iterate_menu():
        # Don't currently support multi-line comments
        if multiline:
            continue

        yield (test, text, region, True)
        yield (test, text, region, False)
Ejemplo n.º 17
0
    def __init__(self, graph, mask=None, navigate_timeout=20):
        if isinstance(graph, nx.DiGraph):
            self.G = graph
        else:
            self.G = Keyboard.parse_edgelist(graph)
        try:
            nx.relabel_nodes(self.G, {"SPACE": " "}, copy=False)
        except KeyError:  # Node SPACE is not in the graph
            pass
        _add_weights(self.G)

        self.mask = None
        if isinstance(mask, numpy.ndarray):
            self.mask = mask
        elif mask:
            self.mask = load_image(mask)

        self.navigate_timeout = navigate_timeout
Ejemplo n.º 18
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=load_image('ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, str)
    assert u'£500\nDavid Röthlisberger' == text
Ejemplo n.º 19
0
def test_that_ocr_can_read_small_text():
    text = stbt.ocr(frame=load_image('ocr/small.png'))
    assert u'Small anti-aliased text is hard to read\nunless you magnify' == \
        text
Ejemplo n.º 20
0
def test_that_ocr_region_none_isnt_allowed():
    with pytest.raises(TypeError):
        stbt.ocr(frame=load_image("ocr/small.png"), region=None)
Ejemplo n.º 21
0
def test_that_ocr_region_none_isnt_allowed(region):
    f = load_image("ocr/small.png")
    with pytest.raises((TypeError, ValueError)):
        stbt.ocr(frame=f, region=region)
    with pytest.raises((TypeError, ValueError)):
        stbt.match_text("Small", frame=f, region=region)
Ejemplo n.º 22
0
def test_that_match_text_accepts_unicode():
    f = load_image("ocr/unicode.png")
    assert stbt.match_text("David", f, lang='eng+deu')  # ascii
    assert stbt.match_text("Röthlisberger", f, lang='eng+deu')  # unicode
    assert stbt.match_text("Röthlisberger".encode('utf-8'), f,
                           lang='eng+deu')  # utf-8 bytes
Ejemplo n.º 23
0
def test_ocr_on_static_images(image, expected_text, region, mode):
    kwargs = {"region": region}
    if mode is not None:
        kwargs["mode"] = mode
    text = stbt.ocr(load_image("ocr/" + image), **kwargs)
    assert text == expected_text
Ejemplo n.º 24
0
def test_char_whitelist():
    # Without char_whitelist tesseract reads "OO" (the letter oh).
    assert u'00' == stbt.ocr(frame=load_image('ocr/00.png'),
                             mode=stbt.OcrMode.SINGLE_WORD,
                             char_whitelist="0123456789")
Ejemplo n.º 25
0
def test_that_default_language_is_configurable():
    f = load_image("ocr/unicode.png")
    assert not stbt.match_text(u"Röthlisberger", f)  # reads Réthlisberger
    with temporary_config({"ocr.lang": "deu"}):
        assert stbt.match_text(u"Röthlisberger", f)
        assert u"Röthlisberger" in stbt.ocr(f)
Ejemplo n.º 26
0
def test_user_dictionary_with_non_english_language(words):
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=words)
Ejemplo n.º 27
0
def test_ocr_on_text_next_to_image_match():
    frame = load_image("action-panel.png")
    m = stbt.match("action-panel-blue-button.png", frame)
    assert "YOUVIEW MENU" == stbt.ocr(frame,
                                      region=m.region.right_of(width=150))
Ejemplo n.º 28
0
def test_match_text_case_sensitivity():
    frame = load_image("ocr/menu.png", cv2.IMREAD_GRAYSCALE)
    assert stbt.match_text("ONION BHAJI", frame)
    assert stbt.match_text("ONION BHAJI", frame, case_sensitive=False)
    assert not stbt.match_text("ONION BHAJI", frame, case_sensitive=True)
Ejemplo n.º 29
0
def test_match_text_on_single_channel_image():
    frame = load_image("ocr/menu.png", cv2.IMREAD_GRAYSCALE)
    assert stbt.match_text("Onion Bhaji", frame)
Ejemplo n.º 30
0
def test_that_match_text_returns_no_match_for_non_matching_text():
    frame = load_image("ocr/menu.png")
    assert not stbt.match_text(u"Noodle Soup", frame=frame)