Ejemplo n.º 1
0
def find(
    image: Union[Image.Image, Path],
    template: Union[Image.Image, Path],
    region: Optional[Region] = None,
    limit: Optional[int] = None,
    confidence: float = DEFAULT_CONFIDENCE,
) -> List[Region]:
    """Attempt to find the template from the given image.

    :param image:       Path to image or Image instance, used to search from
    :param template:    Path to image or Image instance, used to search with
    :param limit:       Limit returned results to maximum of `limit`.
    :param region:      Area to search from. Can speed up search significantly.
    :param confidence:  Confidence for matching, value between 1 and 100
    :return:            List of matching regions
    :raises ImageNotFoundError: No match was found
    """
    # Ensure images are in Pillow format
    image = to_image(image)
    template = to_image(template)

    # Convert confidence value to tolerance
    tolerance = _to_tolerance(confidence)

    # Crop image if requested
    if region is not None:
        region = geometry.to_region(region)
        image = image.crop(region.as_tuple())

    # Verify template still fits in image
    if template.size[0] > image.size[0] or template.size[1] > image.size[1]:
        raise ValueError("Template is larger than search region")

    # Do the actual search
    matches: List[Region] = []
    for match in _match_template(image, template, tolerance):
        matches.append(match)
        if limit is not None and len(matches) >= int(limit):
            break
        elif len(matches) >= LIMIT_FAILSAFE:
            LOGGER.warning("Reached maximum of %d matches", LIMIT_FAILSAFE)
            break

    if not matches:
        raise ImageNotFoundError("No matches for given template")

    # Convert region coördinates back to full-size coördinates
    if region is not None:
        matches = [match.move(region.left, region.top) for match in matches]

    return matches
Ejemplo n.º 2
0
def read(image: Union[Image.Image, Path]):
    """Scan image for text and return it as one string.

    :param image: Path to image or Image object
    """
    image = to_image(image)
    try:
        return pytesseract.image_to_string(image).strip()
    except TesseractNotFoundError as err:
        raise EnvironmentError(INSTALL_PROMPT) from err
Ejemplo n.º 3
0
def find(image: Union[Image.Image, Path],
         text: str,
         confidence: float = DEFAULT_CONFIDENCE):
    """Scan image for text and return a list of regions
    that contain it (or something close to it).

    :param image: Path to image or Image object
    :param text: Text to find in image
    :param confidence: Minimum confidence for text similaritys
    """
    image = to_image(image)
    confidence = clamp(1, float(confidence), 100)

    text = str(text).strip()
    if not text:
        raise ValueError("Empty search string")

    try:
        data = pytesseract.image_to_data(image,
                                         output_type=pytesseract.Output.DICT)
    except TesseractNotFoundError as err:
        raise EnvironmentError(INSTALL_PROMPT) from err

    lines = defaultdict(list)
    for word in _iter_rows(data):
        if word["level"] != 5:
            continue

        if not word["text"].strip():
            continue

        key = "{:d}-{:d}-{:d}".format(word["block_num"], word["par_num"],
                                      word["line_num"])
        region = Region.from_size(word["left"], word["top"], word["width"],
                                  word["height"])

        # NOTE: Currently ignoring confidence in tesseract results
        lines[key].append({"text": word["text"], "region": region})
        assert len(lines[key]) == word["word_num"]

    matches = _match_lines(lines.values(), text, confidence)
    return matches
Ejemplo n.º 4
0
def find(
    image: Union[Image.Image, Path],
    text: str,
    confidence: float = DEFAULT_CONFIDENCE,
    region: Optional[Region] = None,
):
    """Scan image for text and return a list of regions
    that contain it (or something close to it).

    :param image: Path to image or Image object
    :param text: Text to find in image
    :param confidence: Minimum confidence for text similaritys
    """
    image = to_image(image)
    confidence = clamp(1, float(confidence), 100)

    text = str(text).strip()
    if not text:
        raise ValueError("Empty search string")

    if region is not None:
        region = geometry.to_region(region)
        image = image.crop(region.as_tuple())

    try:
        data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
    except TesseractNotFoundError as err:
        raise EnvironmentError(INSTALL_PROMPT) from err

    lines = _dict_lines(data)
    matches = _match_lines(lines, text, confidence)

    if region is not None:
        for match in matches:
            match["region"] = match["region"].move(region.left, region.top)

    return matches