def _find_size(self, base: Geometry, size: SizeLocator): """Find region of fixed size around base, or origin if no base defined.""" if isinstance(base, Undefined): return Region.from_size(0, 0, size.width, size.height) if isinstance(base, Region): center = base.center else: center = base left = center.x - size.width // 2 top = center.y - size.height // 2 result = Region.from_size(left, top, size.width, size.height) return [result]
def _iter_matches(self, image, template) -> Region: """Brute-force search for template image in larger image. Use optimized string search for finding the first row and then check if whole template matches. TODO: Generalize string-search algorithm to work in two dimensions """ image = ImageOps.grayscale(image) template = ImageOps.grayscale(template) template_width, template_height = template.size template_rows = chunks(tuple(template.getdata()), template_width) image_width, _ = image.size image_rows = chunks(tuple(image.getdata()), image_width) for image_y, image_row in enumerate(image_rows[: -len(template_rows)]): for image_x in self._search_string(image_row, template_rows[0]): match = True for match_y, template_row in enumerate(template_rows[1:], image_y): match_row = image_rows[match_y][image_x : image_x + template_width] if template_row != match_row: match = False break if match: yield Region.from_size( image_x, image_y, template_width, template_height )
def get_display_dimensions(self) -> Region: """Returns the dimensions of the current virtual display, which is the combined size of all physical monitors. """ with mss.mss() as sct: disp = sct.monitors[0] return Region.from_size(disp["left"], disp["top"], disp["width"], disp["height"])
def _match_template(image: Image.Image, template: Image.Image, tolerance: float) -> Iterator[Region]: """Use opencv's matchTemplate() to slide the `template` over `image` to calculate correlation coefficients, and then filter with a tolerance to find all relevant global maximums. """ template_width, template_height = template.size if image.mode == "RGBA": image = image.convert("RGB") if template.mode == "RGBA": template = template.convert("RGB") image = numpy.array(image) template = numpy.array(template) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) template = cv2.cvtColor(template, cv2.COLOR_RGB2BGR) # Template matching result is a single channel array of shape: # Width: Image width - template width + 1 # Height: Image height - template height + 1 coefficients = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) coeff_height, coeff_width = coefficients.shape while True: # The point (match_x, match_y) is the top-left of the best match _, match_coeff, _, (match_x, match_y) = cv2.minMaxLoc(coefficients) if match_coeff < tolerance: break # Zero out values for a template-sized region around the best match # to prevent duplicate matches for the same element. left = clamp(0, match_x - template_width // 2, coeff_width) top = clamp(0, match_y - template_height // 2, coeff_height) right = clamp(0, match_x + template_width // 2, coeff_width) bottom = clamp(0, match_y + template_height // 2, coeff_height) coefficients[top:bottom, left:right] = 0 yield Region.from_size(match_x, match_y, template_width, template_height)
def find(image: Union[Image.Image, Path], text: str, confidence: float = DEFAULT_CONFIDENCE): """Scan image for text and return a list of regions that contain it (or something close to it). :param image: Path to image or Image object :param text: Text to find in image :param confidence: Minimum confidence for text similaritys """ image = to_image(image) confidence = clamp(1, float(confidence), 100) text = str(text).strip() if not text: raise ValueError("Empty search string") try: data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) except TesseractNotFoundError as err: raise EnvironmentError(INSTALL_PROMPT) from err lines = defaultdict(list) for word in _iter_rows(data): if word["level"] != 5: continue if not word["text"].strip(): continue key = "{:d}-{:d}-{:d}".format(word["block_num"], word["par_num"], word["line_num"]) region = Region.from_size(word["left"], word["top"], word["width"], word["height"]) # NOTE: Currently ignoring confidence in tesseract results lines[key].append({"text": word["text"], "region": region}) assert len(lines[key]) == word["word_num"] matches = _match_lines(lines.values(), text, confidence) return matches
def _dict_lines(data: Dict) -> List: lines = defaultdict(list) for word in _iter_rows(data): if word["level"] != 5: continue if not word["text"].strip(): continue key = "{:d}-{:d}-{:d}".format( word["block_num"], word["par_num"], word["line_num"] ) region = Region.from_size( word["left"], word["top"], word["width"], word["height"] ) # NOTE: Currently ignoring confidence in tesseract results lines[key].append({"text": word["text"], "region": region}) assert len(lines[key]) == word["word_num"] return list(lines.values())
def _find_from_displays( self, finder: Callable[[Image.Image], List[Region]]) -> List[Region]: """Call finder function for each display and return a list of found regions. :param finder: Callable that searches an image """ matches = [] screenshots = [] # Search all displays, and map results to combined virtual display start_time = time.time() for display in screen.displays(): image = screen.grab(display) regions = finder(image) for region in regions: region = region.resize(5) screenshot = image.crop(region.as_tuple()) screenshots.append(screenshot) local = Region.from_size(0, 0, image.size[0], image.size[1]) regions = transform(regions, local, display) matches.extend(regions) # Log matches and preview images duration = time.time() - start_time plural = "es" if len(matches) != 1 else "" self.logger.info("Searched in %.2f seconds", duration) self.logger.info("Found %d match%s", len(matches), plural) for match, screenshot in zip(matches, screenshots): screen.log_image(screenshot, size=400) self.logger.info(match) return matches
def _iter_match_pillow(self, image, template, tolerance): """Brute-force search for template image in larger image. Use optimized string search for finding the first row and then check if whole template matches. TODO: Generalize string-search algorithm to work in two dimensions """ if tolerance is not None and not self._tolerance_warned: self._tolerance_warned = True self.logger.warning( "Template matching tolerance not supported for current search method" ) image = ImageOps.grayscale(image) template = ImageOps.grayscale(template) template_width, template_height = template.size template_rows = chunks(tuple(template.getdata()), template_width) image_width, _ = image.size image_rows = chunks(tuple(image.getdata()), image_width) for image_y, image_row in enumerate(image_rows[:-len(template_rows)]): for image_x in self._search_string(image_row, template_rows[0]): match = True for match_y, template_row in enumerate(template_rows[1:], image_y): match_row = image_rows[match_y][image_x:image_x + template_width] if template_row != match_row: match = False break if match: yield Region.from_size(image_x, image_y, template_width, template_height)
def _monitor_to_region(monitor: Dict) -> Region: """Convert mss monitor to Region instance.""" return Region.from_size(monitor["left"], monitor["top"], monitor["width"], monitor["height"])