local_image = open(local_image_path, "rb") print("\nRecognizing printed text with OCR on a local image ...\n") ocr_result = computervision_client.recognize_printed_text_in_stream( local_image) for region in ocr_result.regions: for line in region.lines: print("Bounding box: {}".format(line.bounding_box)) s = "" for word in line.words: s += word.text + " " print(s + "\n") # END - Recognize printed text with OCR in a local image # Recognize printed text with OCR in a remote image by: # 1. Calling the Computer Vision service's recognize_printed_text with the: # - image # 2. Displaying the lines of text and their bounding boxes. remote_image_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/printed_text.jpg" print("\nRecognizing printed text with OCR on a remote image ...\n") ocr_result = computervision_client.recognize_printed_text(remote_image_url) for region in ocr_result.regions: for line in region.lines: print("Bounding box: {}".format(line.bounding_box)) s = "" for word in line.words: s += word.text + " " print(s + "\n") # END - Recognize printed text with OCR in a remote image
class ImageExtractor(BaseExtractor): """ This does Object Character Recognition (OCR) using Azure Computer Vision Service """ vision_client = None def __init__(self): try: credentials = CognitiveServicesCredentials(key) self.vision_client = ComputerVisionClient( endpoint="https://" + region + ".api.cognitive.microsoft.com/", credentials=credentials, ) except Exception as e: log.warning( f"Can't init Azure ComputerVisionClient (make sure env vars are correct): {str(e)}" ) self.vision_client = None def _extract_text_from_image(self, filename_or_url: str, language: str = "en"): # url = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/12/Broadway_and_Times_Square_by_night.jpg/450px-Broadway_and_Times_Square_by_night.jpg" # API docs: https://azuresdkdocs.blob.core.windows.net/$web/python/azure-cognitiveservices-vision-computervision/0.7.0/azure.cognitiveservices.vision.computervision.models.html#azure.cognitiveservices.vision.computervision.models.OcrResult # Raw response from Azure Cognitive Service ocr_result: OcrResult = None if os.path.isfile(filename_or_url): # Process a local file local_image = open(filename_or_url, "rb") ocr_result = self.vision_client.recognize_printed_text_in_stream( local_image, detect_orientation=True, language="unk") else: # Process a public URL ocr_result = self.vision_client.recognize_printed_text( url=filename_or_url, detect_orientation=True, language="unk") # Transfer all data into meta meta = { "language": ocr_result.language, "text_angle": ocr_result.text_angle, "orientation": ocr_result.orientation, } # TODO improve word list w. medical dictionary + more languages. # FIXME: not very good results right now spellcheck = Spellchecker(language=ocr_result.language) # Now extract the text from all regions fulltext = "" for region in ocr_result.regions: # print("Bounding box: {}".format(region.bounding_box)) for line in region.lines: # print("Bounding box: {}".format(line.bounding_box)) for word in line.words: corrected = spellcheck.correct_word(word.text) if corrected != word.text: log.debug( f"auto-corrected word: {word.text} -> {corrected}") fulltext += corrected + " " fulltext += "\n" fulltext += "\n\n" # Return mutiple values return fulltext, meta def can_handle(self, request: ExtractorRequest) -> bool: if not self.vision_client: log.warn( "Can't handle input, as the Azure Computer Vision Client hasn't been initialized" ) return False return (request.url and _is_supported_content_type(request.url)) or ( request.filename and _is_supported_content_type(request.filename)) def extract(self, request: ExtractorRequest) -> ExtractorResponse: log.info(f"Extracting text from image (Azure Computer Vision OCR) ...") try: if request.url: fulltext, meta = self._extract_text_from_image(request.url) else: fulltext, meta = self._extract_text_from_image( request.filename) # TODO get some meta data as well meta = {**meta, **{"source": "image", "extractor": "az-vision"}} return ExtractorResponse(text=fulltext, meta=meta) except Exception as e: msg = f"Error extracting text using Azure Computer Vision: '{str(e)}'" log.error(msg) return ExtractorResponse(error=msg)