def __init__(self, menu_regions, item_class=MenuItem, frame=None): if frame is None: frame = stbt.get_frame() super(InteractiveFrameObject, self).__init__(frame) self.menu_regions = menu_regions self.item_class = item_class self.items = []
def __init__(self, frame=None): if frame is None: import stbt frame = stbt.get_frame() self.__frame_object_cache = {} self.__local = threading.local() self._frame = frame
def __init__(self, frame=None): """The default constructor takes an optional frame of video; if the frame is not provided, it will grab a frame from the device-under-test. If you override the constructor in your derived class (for example to accept additional parameters), make sure to accept an optional ``frame`` parameter and supply it to the super-class's constructor. """ if frame is None: import stbt frame = stbt.get_frame() self.__frame_object_cache = {} self.__local = threading.local() self._frame = frame
def _match_all(image, frame, match_parameters, region): """ Generator that yields a sequence of zero or more truthy MatchResults, followed by a falsey MatchResult. """ if match_parameters is None: match_parameters = MatchParameters() template = _load_image(image) if frame is None: import stbt frame = stbt.get_frame() imglog = ImageLogger("match", match_parameters=match_parameters, template_name=template.friendly_name) region = Region.intersect(_image_region(frame), region) # pylint:disable=undefined-loop-variable try: for (matched, match_region, first_pass_matched, first_pass_certainty) in _find_matches(crop(frame, region), template.image, match_parameters, imglog): match_region = Region.from_extents(*match_region) \ .translate(region.x, region.y) result = MatchResult( getattr(frame, "time", None), matched, match_region, first_pass_certainty, frame, (template.relative_filename or template.image), first_pass_matched) imglog.append(matches=result) draw_on(frame, result, label="match(%r)" % os.path.basename(template.friendly_name)) yield result finally: try: _log_match_image_debug(imglog) except Exception: # pylint:disable=broad-except pass
def test_smoke_trigger_open(): """Open MySky app""" itu.clear_test() try: itu.go_to_channel(interactive_constants.CHANNEL_SKY_ONE_HD) match_result = wait_for_match(TRIGGER_IMAGE, timeout_secs=15, region=TRIGGER_REGION) assert match_result.match, '[Trigger] Could not find trigger icon' stbt.press('KEY_GREEN') sleep(5) for i in range(10): frame = stbt.get_frame() # Save image for testing manually later cv2.imwrite('trigger_screen_{0}.jpg'.format(i), frame) finally: itu.clear_test()
def find_selected_tile(frame=None): """Find the selected tile in the grid of players on the Roku Home screen. >>> from utils import load_image >>> print find_selected_tile(frame=load_image("selftest-screenshots/roku-bbc-iplayer-tile-selected.png")) MatchResult(..., match=True, region=Region(x=328, y=151, width=214, height=166), ...) >>> print find_selected_tile(frame=load_image("selftest-screenshots/roku-home.png")).match False """ import cv2 from stbt import get_frame from pagebase import load_image if frame is None: frame = get_frame() frame = cv2.bitwise_and( frame, load_image("selftest-screenshots/roku-tile-selection-mask.png")) return match("selftest-screenshots/roku-tile-selection.png", frame)
def _direction(target_image, frame=None): """Return the key we should press to move the selection one step closer to the target tile. >>> from utils import load_image >>> _direction("images/roku-bbc-iplayer-tile.png", frame=load_image("selftest-screenshots/roku-angry-birds-tile-selected.png")) 'KEY_DOWN' >>> _direction("images/roku-bbc-iplayer-tile.png", frame=load_image("selftest-screenshots/roku-bbc-sport-tile-selected.png")) 'KEY_RIGHT' >>> _direction("images/roku-bbc-iplayer-tile.png", frame=load_image("selftest-screenshots/roku-amazon-video-tile-selected.png")) 'KEY_LEFT' >>> _direction("images/roku-bbc-iplayer-tile.png", frame=load_image("selftest-screenshots/roku-bbc-iplayer-tile-selected.png")) False """ from stbt import get_frame if frame is None: frame = get_frame() selection = find_selected_tile(frame) assert selection, "Fail/ Didn't find currently selected tile" target = match(target_image, frame) assert target, "The target tile isn't visible" if selection.region.contains(target.region): # Already selected return False if target.region.y > selection.region.bottom: return "KEY_DOWN" if target.region.x > selection.region.right: return "KEY_RIGHT" if target.region.right < selection.region.x: return "KEY_LEFT" assert False, "Couldn't figure out how to get from %s to %s" % ( selection.region, target.region)
def _match_all(image, frame, match_parameters, region): """ Generator that yields a sequence of zero or more truthy MatchResults, followed by a falsey MatchResult. """ if match_parameters is None: match_parameters = MatchParameters() if frame is None: import stbt frame = stbt.get_frame() template = _load_image(image) # Normalise single channel images to shape (h, w, 1) rather than just (h, w) t = template.image.view() if len(t.shape) == 2: t.shape = t.shape + (1,) frame = frame.view() if len(frame.shape) == 2: frame.shape = frame.shape + (1,) if len(t.shape) != 3: raise ValueError( "Invalid shape for image: %r. Shape must have 2 or 3 elements" % (template.image.shape,)) if len(frame.shape) != 3: raise ValueError( "Invalid shape for frame: %r. Shape must have 2 or 3 elements" % (frame.shape,)) if t.shape[2] in [1, 3, 4]: pass else: raise ValueError("Expected 3-channel image, got %d channels: %s" % (t.shape[2], template.absolute_filename)) if any(frame.shape[x] < t.shape[x] for x in (0, 1)): raise ValueError("Frame %r must be larger than reference image %r" % (frame.shape, t.shape)) if any(t.shape[x] < 1 for x in (0, 1)): raise ValueError("Reference image %r must contain some data" % (t.shape,)) if (frame.shape[2], t.shape[2]) not in [(1, 1), (3, 3), (3, 4)]: raise ValueError( "Frame %r and reference image %r must have the same number of " "channels" % (frame.shape, t.shape)) if t.shape[2] == 4: if cv2_compat.version < [3, 0, 0]: raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires OpenCV 3.0 or greater (you have %s)." % (template.relative_filename, cv2_compat.version)) if match_parameters.match_method not in (MatchMethod.SQDIFF, MatchMethod.CCORR_NORMED): # See `matchTemplateMask`: # https://github.com/opencv/opencv/blob/3.2.0/modules/imgproc/src/templmatch.cpp#L840-L917 raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires match_method SQDIFF or CCORR_NORMED " "(you specified %s)." % (template.relative_filename, match_parameters.match_method)) input_region = Region.intersect(_image_region(frame), region) if input_region is None: raise ValueError("frame with dimensions %r doesn't contain %r" % (frame.shape, region)) if input_region.height < t.shape[0] or input_region.width < t.shape[1]: raise ValueError("%r must be larger than reference image %r" % (input_region, t.shape)) imglog = ImageLogger( "match", match_parameters=match_parameters, template_name=template.friendly_name, input_region=input_region) # pylint:disable=undefined-loop-variable try: for (matched, match_region, first_pass_matched, first_pass_certainty) in _find_matches( crop(frame, input_region), t, match_parameters, imglog): match_region = Region.from_extents(*match_region) \ .translate(input_region) result = MatchResult( getattr(frame, "time", None), matched, match_region, first_pass_certainty, frame, (template.relative_filename or template.image), first_pass_matched) imglog.append(matches=result) draw_on(frame, result, label="match(%s)" % template.short_repr()) yield result finally: try: _log_match_image_debug(imglog) except Exception: # pylint:disable=broad-except pass
def _match_all(image, frame, match_parameters, region): """ Generator that yields a sequence of zero or more truthy MatchResults, followed by a falsey MatchResult. """ if match_parameters is None: match_parameters = MatchParameters() if frame is None: import stbt frame = stbt.get_frame() template = _load_image(image) t = template.image mask = None if len(t.shape) == 2 or t.shape[2] == 1 or t.shape[2] == 3: pass elif t.shape[2] == 4: # Create transparency mask from alpha channel mask = t[:, :, 3] transparent = mask < 255 if numpy.any(transparent): mask[transparent] = 0 # OpenCV wants mask to match template's number of channels mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) else: mask = None t = t[:, :, 0:3] else: raise ValueError("Expected 3-channel image, got %d channels: %s" % (t.shape[2], template.absolute_filename)) if any(frame.shape[x] < t.shape[x] for x in (0, 1)): raise ValueError("Frame %r must be larger than reference image %r" % (frame.shape, t.shape)) if any(t.shape[x] < 1 for x in (0, 1)): raise ValueError("Reference image %r must contain some data" % (t.shape, )) if (len(frame.shape) != len(t.shape) or len(frame.shape) == 3 and frame.shape[2] != t.shape[2]): raise ValueError( "Frame %r and reference image %r must have the same number of " "channels" % (frame.shape, t.shape)) if mask is not None: if cv2_compat.version < [3, 0, 0]: raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires OpenCV 3.0 or greater (you have %s)." % (template.relative_filename, cv2_compat.version)) if match_parameters.match_method not in (MatchMethod.SQDIFF, MatchMethod.CCORR_NORMED): # See `matchTemplateMask`: # https://github.com/opencv/opencv/blob/3.2.0/modules/imgproc/src/templmatch.cpp#L840-L917 raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires match_method SQDIFF or CCORR_NORMED " "(you specified %s)." % (template.relative_filename, match_parameters.match_method)) imglog = ImageLogger("match", match_parameters=match_parameters, template_name=template.friendly_name) input_region = Region.intersect(_image_region(frame), region) if input_region is None: raise ValueError("frame with dimensions %r doesn't contain %r" % (frame.shape, region)) # pylint:disable=undefined-loop-variable try: for (matched, match_region, first_pass_matched, first_pass_certainty) in _find_matches(crop(frame, input_region), t, mask, match_parameters, imglog): match_region = Region.from_extents(*match_region) \ .translate(input_region.x, input_region.y) result = MatchResult( getattr(frame, "time", None), matched, match_region, first_pass_certainty, frame, (template.relative_filename or template.image), first_pass_matched) imglog.append(matches=result) draw_on(frame, result, label="match(%r)" % os.path.basename(template.friendly_name)) yield result finally: try: _log_match_image_debug(imglog) except Exception: # pylint:disable=broad-except pass
def ocr(frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, tesseract_user_words=None, tesseract_user_patterns=None, upsample=True, text_color=None, text_color_threshold=None, engine=None): r"""Return the text present in the video frame as a Unicode string. Perform OCR (Optical Character Recognition) using the "Tesseract" open-source OCR engine. :param frame: If this is specified it is used as the video frame to process; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :param region: Only search within the specified region of the video frame. :type region: `Region` :param mode: Tesseract's layout analysis mode. :type mode: `OcrMode` :param str lang: The three-letter `ISO-639-3 <http://www.loc.gov/standards/iso639-2/php/code_list.php>`__ language code of the language you are attempting to read; for example "eng" for English or "deu" for German. More than one language can be specified by joining with '+'; for example "eng+deu" means that the text to be read may be in a mixture of English and German. This defaults to "eng" (English). You can override the global default value by setting ``lang`` in the ``[ocr]`` section of :ref:`.stbt.conf`. You may need to install the tesseract language pack; see installation instructions `here <https://stb-tester.com/manual/troubleshooting#install-ocr-language-pack>`__. :param dict tesseract_config: Allows passing configuration down to the underlying OCR engine. See the `tesseract documentation <https://github.com/tesseract-ocr/tesseract/wiki/ControlParams>`__ for details. :type tesseract_user_words: unicode string, or list of unicode strings :param tesseract_user_words: List of words to be added to the tesseract dictionary. To replace the tesseract system dictionary altogether, also set ``tesseract_config={'load_system_dawg': False, 'load_freq_dawg': False}``. :type tesseract_user_patterns: unicode string, or list of unicode strings :param tesseract_user_patterns: List of patterns to add to the tesseract dictionary. The tesseract pattern language corresponds roughly to the following regular expressions:: tesseract regex ========= =========== \c [a-zA-Z] \d [0-9] \n [a-zA-Z0-9] \p [:punct:] \a [a-z] \A [A-Z] \* * :param bool upsample: Upsample the image 3x before passing it to tesseract. This helps to preserve information in the text's anti-aliasing that would otherwise be lost when tesseract binarises the image. This defaults to ``True``; you should only disable it if you are doing your own pre-processing on the image. :type text_color: 3-element tuple of integers between 0 and 255, BGR order :param text_color: Color of the text. Specifying this can improve OCR results when tesseract's default thresholding algorithm doesn't detect the text, for example white text on a light-colored background or text on a translucent overlay. :param int text_color_threshold: The threshold to use with ``text_color``, between 0 and 255. Defaults to 25. You can override the global default value by setting ``text_color_threshold`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :param engine: The OCR engine to use. Defaults to ``OcrEngine.TESSERACT``. You can override the global default value by setting ``engine`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :type engine: `OcrEngine` | Added in v28: The ``upsample`` and ``text_color`` parameters. | Added in v29: The ``text_color_threshold`` parameter. | Added in v30: The ``engine`` parameter and support for Tesseract v4. """ if frame is None: import stbt frame = stbt.get_frame() if region is None: raise TypeError( "Passing region=None to ocr is deprecated since v0.21. " "In a future version, region=None will mean an empty region " "instead. To OCR an entire video frame, use " "`region=Region.ALL`.") if isinstance(tesseract_user_words, (str, unicode)): tesseract_user_words = [tesseract_user_words] if isinstance(tesseract_user_patterns, (str, unicode)): tesseract_user_patterns = [tesseract_user_patterns] imglog = ImageLogger("ocr") text, region = _tesseract(frame, region, mode, lang, tesseract_config, tesseract_user_patterns, tesseract_user_words, upsample, text_color, text_color_threshold, engine, imglog) text = text.strip().translate(_ocr_transtab) debug(u"OCR in region %s read '%s'." % (region, text)) _log_ocr_image_debug(imglog, text) return text
def soak_remote_control(key_next="KEY_RIGHT", key_prev="KEY_LEFT", region=stbt.Region.ALL, mask=None, count=100): """ Soaks a remote control by pressing KEY_LEFT and KEY_RIGHT keys and making sure they have an effect each time. We check that every time we press KEY_LEFT and KEY_RIGHT we get back to where we started. This should be sufficient to detect missed keypresses and intermittent double presses. Use ``region`` and/or ``mask`` to exclude parts of the page that might change from press to press, such as picture-in-picture video or clocks. """ if mask is None: m = stbt.crop( numpy.ones(stbt.get_frame().shape[:2], dtype=numpy.uint8) * 255, region) else: m = stbt.load_image(mask, cv2.IMREAD_GRAYSCALE) # Get in a position where we'll be able to press left later. Note: no # assertion - it's ok if we can't move right right now stbt.press(key_next) print(region, m.shape) stbt.press_and_wait(key_next, region=region, mask=m) # pylint:disable=stbt-unused-return-value # Grab reference images of the left and right position. We need these to # check that we've actually moved, and haven't moved too far. We add an # alpha channel (transparency) using the user-supplied mask. right_template = numpy.append(stbt.crop(stbt.get_frame(), region), m[:, :, numpy.newaxis], axis=2) cv2.imwrite("right_template.png", right_template) if stbt.press_and_wait(key_prev, region=region, mask=m).status == \ stbt.TransitionStatus.START_TIMEOUT: raise RuntimeError("No movement after pressing %r during setup" % (key_prev, )) if stbt.match(right_template, region=region): raise RuntimeError( "Setup error: No detectable differences after pressing %r" % (key_prev, )) left_template = numpy.append(stbt.crop(stbt.get_frame(), region), m[:, :, numpy.newaxis], axis=2) cv2.imwrite("left_template.png", left_template) # Error messages: missed_press = "Missed keypress: No change after pressing %s" double_press = \ "Didn't find expected screen after pressing %s (double keypress?)" # Now we perform the actual test: for _ in range(count // 2): assert stbt.press_and_wait(key_next, region=region, mask=m), \ missed_press % (key_next,) assert stbt.match(right_template, region=region), \ double_press % (key_next,) assert stbt.press_and_wait(key_prev, region=region, mask=m), \ missed_press % (key_prev,) assert stbt.match(left_template, region=region), \ double_press % (key_prev,)
def is_screen_black(frame=None, mask=None, threshold=None, region=Region.ALL): """Check for the presence of a black screen in a video frame. :type frame: `stbt.Frame` or `numpy.ndarray` :param frame: If this is specified it is used as the video frame to check; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :type mask: str or `numpy.ndarray` :param mask: A black & white image that specifies which part of the image to analyse. White pixels select the area to analyse; black pixels select the area to ignore. The mask must be the same size as the video frame. This can be a string (a filename that will be resolved as per `load_image`) or a single-channel image in OpenCV format. :param int threshold: Even when a video frame appears to be black, the intensity of its pixels is not always 0. To differentiate almost-black from non-black pixels, a binary threshold is applied to the frame. The ``threshold`` value is in the range 0 (black) to 255 (white). The global default can be changed by setting ``threshold`` in the ``[is_screen_black]`` section of :ref:`.stbt.conf`. :type region: `Region` :param region: Only analyze the specified region of the video frame. If you specify both ``region`` and ``mask``, the mask must be the same size as the region. :returns: An object that will evaluate to true if the frame was black, or false if not black. The object has the following attributes: * **black** (*bool*) – True if the frame was black. * **frame** (`stbt.Frame`) – The video frame that was analysed. | Added in v28: The ``region`` parameter. | Added in v29: Return an object with a frame attribute, instead of bool. """ if threshold is None: threshold = get_config('is_screen_black', 'threshold', type_=int) if frame is None: import stbt frame = stbt.get_frame() if mask is None: mask = _ImageFromUser(None, None, None) else: mask = _load_image(mask, cv2.IMREAD_GRAYSCALE) imglog = ImageLogger("is_screen_black", region=region, threshold=threshold) imglog.imwrite("source", frame) _region = Region.intersect(_image_region(frame), region) greyframe = cv2.cvtColor(crop(frame, _region), cv2.COLOR_BGR2GRAY) if mask.image is not None: imglog.imwrite("mask", mask.image) cv2.bitwise_and(greyframe, mask.image, dst=greyframe) maxVal = greyframe.max() result = _IsScreenBlackResult(bool(maxVal <= threshold), frame) debug("is_screen_black: {found} black screen using mask={mask}, " "threshold={threshold}, region={region}: " "{result}, maximum_intensity={maxVal}".format( found="Found" if result.black else "Didn't find", mask=mask.friendly_name, threshold=threshold, region=region, result=result, maxVal=maxVal)) if imglog.enabled: imglog.imwrite("grey", greyframe) _, thresholded = cv2.threshold(greyframe, threshold, 255, cv2.THRESH_BINARY) imglog.imwrite("non_black", thresholded) imglog.set(maxVal=maxVal, non_black_region=pixel_bounding_box(thresholded)) _log_image_debug(imglog, result) return result
def find_selection_horizontal_repeat( frame, background, region=stbt.Region.ALL, match_threshold=0.95): """Find the selected menu item by looking for the specified background. This is an example to demonstrate that you can implement your own custom image processing with OpenCV. :param frame: An OpenCV image, as returned by `stbt.get_frame` or `cv2.imread`. If `None`, will pull a new frame from the system under test. :param background: The path to a 1-pixel-wide image of your system-under-test's menu selection/highlight. :param region: If specified, restrict the search to this region of the frame. :returns: A `Selection` object representing the selected item. Example:: >>> frame = load_image("selftest-screenshots/roku-home.png") >>> find_selection_horizontal_repeat( ... frame, "images/roku-menu-selection-background.png") Selection(region=Region(x=119, y=162, right=479, bottom=204), text=u'Home') """ if frame is None: frame = stbt.get_frame() frame = crop(frame, region) bg = load_image(background) correlation = 1 - cv2.matchTemplate(frame, bg, cv2.TM_SQDIFF_NORMED) _, max_, _, _ = cv2.minMaxLoc(correlation) selection_region = None if max_ >= match_threshold: # Find y coordinate rowmax = numpy.amax(correlation, axis=1) goodness = rowmax _, _, _, maxloc = cv2.minMaxLoc(goodness) y = maxloc[1] # Got the y position, now work out the horizontal extents line_uint8 = numpy.uint8(correlation[y, :]*255) _, binary = cv2.threshold(line_uint8, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY) binary = binary.flatten() nonzeros = list(_combine_neighbouring_extents( list(_zeros_to_extents(binary.nonzero()[0])))) if nonzeros: widest = max(nonzeros, key=lambda a: a[1] - a[0]) x, right = widest selection_region = ( stbt.Region(x, y, right=right, bottom=y + bg.shape[0]) .translate(x=max(0, region.x), y=max(0, region.y))) if selection_region.width > 10: # Remove the rounded corners of the selection; after subtracting # the background they look like single-quotes to the OCR engine. selection_region = selection_region.extend(x=5, right=-5) if not selection_region: stbt.debug( "find_selection didn't find match (%.2f) above the threshold (%.2f)" % (max_, match_threshold)) return Selection(selection_region, frame, bg)
def ocr(frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, tesseract_user_words=None, tesseract_user_patterns=None, upsample=True, text_color=None, text_color_threshold=None, engine=None): r"""Return the text present in the video frame as a Unicode string. Perform OCR (Optical Character Recognition) using the "Tesseract" open-source OCR engine. :param frame: If this is specified it is used as the video frame to process; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :param region: Only search within the specified region of the video frame. :type region: `Region` :param mode: Tesseract's layout analysis mode. :type mode: `OcrMode` :param str lang: The three-letter `ISO-639-3 <http://www.loc.gov/standards/iso639-2/php/code_list.php>`__ language code of the language you are attempting to read; for example "eng" for English or "deu" for German. More than one language can be specified by joining with '+'; for example "eng+deu" means that the text to be read may be in a mixture of English and German. This defaults to "eng" (English). You can override the global default value by setting ``lang`` in the ``[ocr]`` section of :ref:`.stbt.conf`. You may need to install the tesseract language pack; see installation instructions `here <https://stb-tester.com/manual/troubleshooting#install-ocr-language-pack>`__. :param dict tesseract_config: Allows passing configuration down to the underlying OCR engine. See the `tesseract documentation <https://github.com/tesseract-ocr/tesseract/wiki/ControlParams>`__ for details. :type tesseract_user_words: unicode string, or list of unicode strings :param tesseract_user_words: List of words to be added to the tesseract dictionary. To replace the tesseract system dictionary altogether, also set ``tesseract_config={'load_system_dawg': False, 'load_freq_dawg': False}``. :type tesseract_user_patterns: unicode string, or list of unicode strings :param tesseract_user_patterns: List of patterns to add to the tesseract dictionary. The tesseract pattern language corresponds roughly to the following regular expressions:: tesseract regex ========= =========== \c [a-zA-Z] \d [0-9] \n [a-zA-Z0-9] \p [:punct:] \a [a-z] \A [A-Z] \* * :param bool upsample: Upsample the image 3x before passing it to tesseract. This helps to preserve information in the text's anti-aliasing that would otherwise be lost when tesseract binarises the image. This defaults to ``True``; you should only disable it if you are doing your own pre-processing on the image. :type text_color: 3-element tuple of integers between 0 and 255, BGR order :param text_color: Color of the text. Specifying this can improve OCR results when tesseract's default thresholding algorithm doesn't detect the text, for example white text on a light-colored background or text on a translucent overlay. :param int text_color_threshold: The threshold to use with ``text_color``, between 0 and 255. Defaults to 25. You can override the global default value by setting ``text_color_threshold`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :param engine: The OCR engine to use. Defaults to ``OcrEngine.TESSERACT``. You can override the global default value by setting ``engine`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :type engine: `OcrEngine` | Added in v28: The ``upsample`` and ``text_color`` parameters. | Added in v29: The ``text_color_threshold`` parameter. | Added in v30: The ``engine`` parameter and support for Tesseract v4. """ if frame is None: import stbt frame = stbt.get_frame() if region is None: raise TypeError( "Passing region=None to ocr is deprecated since v0.21. " "In a future version, region=None will mean an empty region " "instead. To OCR an entire video frame, use " "`region=Region.ALL`.") if isinstance(tesseract_user_words, (str, unicode)): tesseract_user_words = [tesseract_user_words] if isinstance(tesseract_user_patterns, (str, unicode)): tesseract_user_patterns = [tesseract_user_patterns] imglog = ImageLogger("ocr") text, region = _tesseract( frame, region, mode, lang, tesseract_config, tesseract_user_patterns, tesseract_user_words, upsample, text_color, text_color_threshold, engine, imglog) text = text.strip().translate(_ocr_transtab) debug(u"OCR in region %s read '%s'." % (region, text)) _log_ocr_image_debug(imglog, text) return text
def match_text(text, frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, case_sensitive=False, upsample=True, text_color=None, text_color_threshold=None, engine=None): """Search for the specified text in a single video frame. This can be used as an alternative to `match`, searching for text instead of an image. :param unicode text: The text to search for. :param frame: See `ocr`. :param region: See `ocr`. :param mode: See `ocr`. :param lang: See `ocr`. :param tesseract_config: See `ocr`. :param upsample: See `ocr`. :param text_color: See `ocr`. :param text_color_threshold: See `ocr`. :param engine: See `ocr`. :param bool case_sensitive: Ignore case if False (the default). :returns: A `TextMatchResult`, which will evaluate to True if the text was found, false otherwise. For example, to select a button in a vertical menu by name (in this case "TV Guide"):: m = stbt.match_text("TV Guide") assert m.match while not stbt.match('selected-button.png').region.contains(m.region): stbt.press('KEY_DOWN') | Added in v28: The ``upsample`` and ``text_color`` parameters. | Added in v29: The ``text_color_threshold`` parameter. | Added in v30: The ``engine`` parameter and support for Tesseract v4. """ import lxml.etree if frame is None: import stbt frame = stbt.get_frame() _config = dict(tesseract_config or {}) _config['tessedit_create_hocr'] = 1 rts = getattr(frame, "time", None) imglog = ImageLogger("match_text") xml, region = _tesseract(frame, region, mode, lang, _config, None, text.split(), upsample, text_color, text_color_threshold, engine, imglog) if xml == '': hocr = None result = TextMatchResult(rts, False, None, frame, text) else: hocr = lxml.etree.fromstring(xml.encode('utf-8')) p = _hocr_find_phrase(hocr, _to_unicode(text).split(), case_sensitive) if p: # Find bounding box box = None for _, elem in p: box = Region.bounding_box(box, _hocr_elem_region(elem)) # _tesseract crops to region and scales up by a factor of 3 so # we must undo this transformation here. n = 3 if upsample else 1 box = Region.from_extents( region.x + box.x // n, region.y + box.y // n, region.x + box.right // n, region.y + box.bottom // n) result = TextMatchResult(rts, True, box, frame, text) else: result = TextMatchResult(rts, False, None, frame, text) if result.match: debug("match_text: Match found: %s" % str(result)) else: debug("match_text: No match found: %s" % str(result)) imglog.set(text=text, case_sensitive=case_sensitive, result=result, hocr=hocr) _log_ocr_image_debug(imglog) return result
def ocr(frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, tesseract_user_words=None, tesseract_user_patterns=None, upsample=True, text_color=None, text_color_threshold=None, engine=None, char_whitelist=None, corrections=None): r"""Return the text present in the video frame as a Unicode string. Perform OCR (Optical Character Recognition) using the "Tesseract" open-source OCR engine. :param frame: If this is specified it is used as the video frame to process; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :param region: Only search within the specified region of the video frame. :type region: `Region` :param mode: Tesseract's layout analysis mode. :type mode: `OcrMode` :param str lang: The three-letter `ISO-639-3 <http://www.loc.gov/standards/iso639-2/php/code_list.php>`__ language code of the language you are attempting to read; for example "eng" for English or "deu" for German. More than one language can be specified by joining with '+'; for example "eng+deu" means that the text to be read may be in a mixture of English and German. This defaults to "eng" (English). You can override the global default value by setting ``lang`` in the ``[ocr]`` section of :ref:`.stbt.conf`. You may need to install the tesseract language pack; see installation instructions `here <https://stb-tester.com/manual/troubleshooting#install-ocr-language-pack>`__. :param dict tesseract_config: Allows passing configuration down to the underlying OCR engine. See the `tesseract documentation <https://github.com/tesseract-ocr/tesseract/wiki/ControlParams>`__ for details. :type tesseract_user_words: unicode string, or list of unicode strings :param tesseract_user_words: List of words to be added to the tesseract dictionary. To replace the tesseract system dictionary altogether, also set ``tesseract_config={'load_system_dawg': False, 'load_freq_dawg': False}``. :type tesseract_user_patterns: unicode string, or list of unicode strings :param tesseract_user_patterns: List of patterns to add to the tesseract dictionary. The tesseract pattern language corresponds roughly to the following regular expressions:: tesseract regex ========= =========== \c [a-zA-Z] \d [0-9] \n [a-zA-Z0-9] \p [:punct:] \a [a-z] \A [A-Z] \* * :param bool upsample: Upsample the image 3x before passing it to tesseract. This helps to preserve information in the text's anti-aliasing that would otherwise be lost when tesseract binarises the image. This defaults to ``True``; you should only disable it if you are doing your own pre-processing on the image. :type text_color: 3-element tuple of integers between 0 and 255, BGR order :param text_color: Color of the text. Specifying this can improve OCR results when tesseract's default thresholding algorithm doesn't detect the text, for example white text on a light-colored background or text on a translucent overlay. :param int text_color_threshold: The threshold to use with ``text_color``, between 0 and 255. Defaults to 25. You can override the global default value by setting ``text_color_threshold`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :param engine: The OCR engine to use. Defaults to ``OcrEngine.TESSERACT``. You can override the global default value by setting ``engine`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :type engine: `OcrEngine` :type char_whitelist: unicode string :param char_whitelist: String of characters that are allowed. Useful when you know that the text is only going to contain numbers or IP addresses, for example so that tesseract won't think that a zero is the letter o. Note that Tesseract 4.0's LSTM engine ignores ``char_whitelist``. :param dict corrections: Dictionary of corrections to replace known OCR mis-reads. Each key of the dict is the text to search for; the value is the corrected string to replace the matching key. If the key is a string, it is treated as plain text and it will only match at word boundaries (for example the string ``"he saw"`` won't match ``"the saw"`` nor ``"he saws"``). If the key is a regular expression pattern (created with `re.compile`) it can match anywhere, and the replacement string can contain backreferences such as ``"\1"`` which are replaced with the corresponding group in the pattern (same as Python's `re.sub`). Example:: corrections={'bad': 'good', re.compile(r'[oO]'): '0'} Plain strings are replaced first (in the order they are specified), followed by regular expresions (in the order they are specified). The default value for this parameter can be set with `stbt.set_global_ocr_corrections`. If global corrections have been set *and* this ``corrections`` parameter is specified, the corrections in this parameter are applied first. | Added in v30: The ``engine`` parameter and support for Tesseract v4. | Added in v31: The ``char_whitelist`` parameter. | Added in v32: The ``corrections`` parameter. """ if frame is None: import stbt frame = stbt.get_frame() if region is None: raise TypeError( "Passing region=None to ocr is deprecated since v0.21. " "In a future version, region=None will mean an empty region " "instead. To OCR an entire video frame, use " "`region=Region.ALL`.") if isinstance(tesseract_user_words, (bytes, str)): tesseract_user_words = [tesseract_user_words] if isinstance(tesseract_user_patterns, (bytes, str)): tesseract_user_patterns = [tesseract_user_patterns] imglog = ImageLogger("ocr", result=None) text, region = _tesseract(frame, region, mode, lang, tesseract_config, tesseract_user_patterns, tesseract_user_words, upsample, text_color, text_color_threshold, engine, char_whitelist, imglog) text = text.strip().translate(_ocr_transtab) text = apply_ocr_corrections(text, corrections) debug(u"OCR in region %s read '%s'." % (region, text)) _log_ocr_image_debug(imglog, text) return text
def find_selection_horizontal_repeat(frame, background, region=stbt.Region.ALL, match_threshold=0.95): """Find the selected menu item by looking for the specified background. This is an example to demonstrate that you can implement your own custom image processing with OpenCV. :param frame: An OpenCV image, as returned by `stbt.get_frame` or `cv2.imread`. If `None`, will pull a new frame from the system under test. :param background: The path to a 1-pixel-wide image of your system-under-test's menu selection/highlight. :param region: If specified, restrict the search to this region of the frame. :returns: A `Selection` object representing the selected item. Example:: >>> frame = load_image("../selftest/screenshots/roku-home.png") >>> find_selection_horizontal_repeat( ... frame, "images/roku-menu-selection-background.png") Selection(region=Region(x=116, y=157, right=483, bottom=208), text=u'Home') """ if frame is None: frame = stbt.get_frame() frame = crop(frame, region) bg = load_image(background) correlation = 1 - cv2.matchTemplate(frame, bg, cv2.TM_SQDIFF_NORMED) _, max_, _, _ = cv2.minMaxLoc(correlation) selection_region = None if max_ >= match_threshold: # Find y coordinate rowmax = numpy.amax(correlation, axis=1) goodness = rowmax _, _, _, maxloc = cv2.minMaxLoc(goodness) y = maxloc[1] # Got the y position, now work out the horizontal extents line_uint8 = numpy.uint8(correlation[y, :] * 255) _, binary = cv2.threshold(line_uint8, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY) binary = binary.flatten() nonzeros = list( _combine_neighbouring_extents( list(_zeros_to_extents(binary.nonzero()[0])))) if nonzeros: widest = max(nonzeros, key=lambda a: a[1] - a[0]) x, right = widest selection_region = (stbt.Region(x, y, right=right, bottom=y + bg.shape[0]).translate( x=max(0, region.x), y=max(0, region.y))) if selection_region.width > 10: # Remove the rounded corners of the selection; after subtracting # the background they look like single-quotes to the OCR engine. selection_region = selection_region.extend(x=5, right=-5) if not selection_region: stbt.debug( "find_selection didn't find match (%.2f) above the threshold (%.2f)" % (max_, match_threshold)) return Selection(selection_region, frame, bg)
def match_text(text, frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, case_sensitive=False, upsample=True, text_color=None, text_color_threshold=None, engine=None): """Search for the specified text in a single video frame. This can be used as an alternative to `match`, searching for text instead of an image. :param unicode text: The text to search for. :param frame: See `ocr`. :param region: See `ocr`. :param mode: See `ocr`. :param lang: See `ocr`. :param tesseract_config: See `ocr`. :param upsample: See `ocr`. :param text_color: See `ocr`. :param text_color_threshold: See `ocr`. :param engine: See `ocr`. :param bool case_sensitive: Ignore case if False (the default). :returns: A `TextMatchResult`, which will evaluate to True if the text was found, false otherwise. For example, to select a button in a vertical menu by name (in this case "TV Guide"):: m = stbt.match_text("TV Guide") assert m.match while not stbt.match('selected-button.png').region.contains(m.region): stbt.press('KEY_DOWN') | Added in v28: The ``upsample`` and ``text_color`` parameters. | Added in v29: The ``text_color_threshold`` parameter. | Added in v30: The ``engine`` parameter and support for Tesseract v4. """ import lxml.etree if frame is None: import stbt frame = stbt.get_frame() _config = dict(tesseract_config or {}) _config['tessedit_create_hocr'] = 1 rts = getattr(frame, "time", None) imglog = ImageLogger("match_text") xml, region = _tesseract(frame, region, mode, lang, _config, None, text.split(), upsample, text_color, text_color_threshold, engine, imglog) if xml == '': hocr = None result = TextMatchResult(rts, False, None, frame, text) else: hocr = lxml.etree.fromstring(xml.encode('utf-8')) p = _hocr_find_phrase(hocr, _to_unicode(text).split(), case_sensitive) if p: # Find bounding box box = None for _, elem in p: box = Region.bounding_box(box, _hocr_elem_region(elem)) # _tesseract crops to region and scales up by a factor of 3 so # we must undo this transformation here. n = 3 if upsample else 1 box = Region.from_extents(region.x + box.x // n, region.y + box.y // n, region.x + box.right // n, region.y + box.bottom // n) result = TextMatchResult(rts, True, box, frame, text) else: result = TextMatchResult(rts, False, None, frame, text) if result.match: debug("match_text: Match found: %s" % str(result)) else: debug("match_text: No match found: %s" % str(result)) imglog.set(text=text, case_sensitive=case_sensitive, result=result, hocr=hocr) _log_ocr_image_debug(imglog) return result
def _match_all(image, frame, match_parameters, region): """ Generator that yields a sequence of zero or more truthy MatchResults, followed by a falsey MatchResult. """ if match_parameters is None: match_parameters = MatchParameters() if frame is None: import stbt frame = stbt.get_frame() template = _load_image(image) # Normalise single channel images to shape (h, w, 1) rather than just (h, w) t = template.image.view() if len(t.shape) == 2: t.shape = t.shape + (1,) frame = frame.view() if len(frame.shape) == 2: frame.shape = frame.shape + (1,) if len(t.shape) != 3: raise ValueError( "Invalid shape for image: %r. Shape must have 2 or 3 elements" % (template.image.shape,)) if len(frame.shape) != 3: raise ValueError( "Invalid shape for frame: %r. Shape must have 2 or 3 elements" % (frame.shape,)) if t.shape[2] in [1, 3, 4]: pass else: raise ValueError("Expected 3-channel image, got %d channels: %s" % (t.shape[2], template.absolute_filename)) if any(frame.shape[x] < t.shape[x] for x in (0, 1)): raise ValueError("Frame %r must be larger than reference image %r" % (frame.shape, t.shape)) if any(t.shape[x] < 1 for x in (0, 1)): raise ValueError("Reference image %r must contain some data" % (t.shape,)) if (frame.shape[2], t.shape[2]) not in [(1, 1), (3, 3), (3, 4)]: raise ValueError( "Frame %r and reference image %r must have the same number of " "channels" % (frame.shape, t.shape)) if t.shape[2] == 4: if cv2_compat.version < [3, 0, 0]: raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires OpenCV 3.0 or greater (you have %s)." % (template.relative_filename, cv2_compat.version)) if match_parameters.match_method not in (MatchMethod.SQDIFF, MatchMethod.CCORR_NORMED): # See `matchTemplateMask`: # https://github.com/opencv/opencv/blob/3.2.0/modules/imgproc/src/templmatch.cpp#L840-L917 raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires match_method SQDIFF or CCORR_NORMED " "(you specified %s)." % (template.relative_filename, match_parameters.match_method)) input_region = Region.intersect(_image_region(frame), region) if input_region is None: raise ValueError("frame with dimensions %r doesn't contain %r" % (frame.shape, region)) if input_region.height < t.shape[0] or input_region.width < t.shape[1]: raise ValueError("%r must be larger than reference image %r" % (input_region, t.shape)) imglog = ImageLogger( "match", match_parameters=match_parameters, template_name=template.friendly_name, input_region=input_region) # pylint:disable=undefined-loop-variable try: for (matched, match_region, first_pass_matched, first_pass_certainty) in _find_matches( crop(frame, input_region), t, match_parameters, imglog): match_region = Region.from_extents(*match_region) \ .translate(input_region.x, input_region.y) result = MatchResult( getattr(frame, "time", None), matched, match_region, first_pass_certainty, frame, (template.relative_filename or template.image), first_pass_matched) imglog.append(matches=result) draw_on(frame, result, label="match(%r)" % os.path.basename(template.friendly_name)) yield result finally: try: _log_match_image_debug(imglog) except Exception: # pylint:disable=broad-except pass