def result(self, path): '''Returns all the results from calling the service on the 'path'. The results are returned as an TRResult named tuple. ''' # Check if we already processed it. if path in self._results: if __debug__: log('returning already-known result for {}', path) return self._results[path] # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error base_url = 'https://westus.api.cognitive.microsoft.com/vision/v2.0/' url = base_url + 'recognizeText' params = {'mode': 'Handwritten'} headers = {'Ocp-Apim-Subscription-Key': self._credentials, 'Content-Type': 'application/octet-stream'} # The Microsoft API for extracting text requires two phases: one call # to submit the image for processing, then polling to wait until the # text is ready to be retrieved. if __debug__: log('sending file to MS cloud service') response, error = net('post', url, headers = headers, params = params, data = image) if isinstance(error, NetworkFailure): if __debug__: log('network exception: {}', str(error)) return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits # The headers should have a Retry-After number in seconds. sleep_time = 30 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log('sleeping for {} s and retrying', sleep_time) sleep(sleep_time) return self.result(path) # Recursive invocation elif error: raise error if 'Operation-Location' in response.headers: polling_url = response.headers['Operation-Location'] else: if __debug__: log('no operation-location in response headers') raise ServiceFailure('Unexpected response from Microsoft server') if __debug__: log('polling MS for results ...') analysis = {} poll = True while poll: # I never have seen results returned in 1 second, and meanwhile # the repeated polling counts against your rate limit. So, wait # for 2 s to reduce the number of calls. sleep(2) response, error = net('get', polling_url, polling = True, headers = headers) if isinstance(error, NetworkFailure): if __debug__: log('network exception: {}', str(error)) return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # Pause to let the server reset its timers. It seems that MS # doesn't send back a Retry-After header when rated limited # during polling, but I'm going to check it anyway, in case. sleep_time = 30 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log('sleeping for {} s and retrying', sleep_time) sleep(sleep_time) elif error: raise error # Sometimes the response comes back without content. I don't know # if that's a bug in the Azure system or not. It's not clear what # else should be done except keep going. if response.text: analysis = response.json() if 'recognitionResult' in analysis: poll = False if 'status' in analysis and analysis['status'] == 'Failed': poll = False else: if __debug__: log('received empty result from Microsoft.') if __debug__: log('results received.') # Have to extract the text into a single string. full_text = '' if 'recognitionResult' in analysis: lines = analysis['recognitionResult']['lines'] sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0])) full_text = '\n'.join(x['text'] for x in sorted_lines) # Create our particular box structure for annotations. The Microsoft # structure is like this: data['recognitionResult']['lines'] contains # a list of dict with keys 'words', 'boundingBox', and 'text'. boxes = [] for chunk in lines: boxes.append(TextBox(boundingBox = chunk['boundingBox'], text = chunk['text'])) # Put it all together. self._results[path] = TRResult(path = path, data = analysis, text = full_text, boxes = boxes, error = None) return self._results[path]
def result(self, path): '''Returns the results from calling the service on the 'path'. The results are returned as an TRResult named tuple. ''' # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error try: if __debug__: log('building Google vision API object') client = gv.ImageAnnotatorClient() image = gv.types.Image(content=image) context = gv.types.ImageContext( language_hints=['en-t-i0-handwrit']) # Iterate over the known API calls and store each result. result = dict.fromkeys(self._known_features) for feature in self._known_features: if __debug__: log('sending image to Google for {} ...', feature) response = getattr(client, feature)(image=image, image_context=context) if __debug__: log('received result.') result[feature] = MessageToDict(response) # Extract text and bounding boxes into our format. # Their structure looks like this: # # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0].keys() # --> dict_keys(['boundingBox', 'confidence', 'paragraphs', 'blockType']) # # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0]['paragraphs'][0].keys() # --> dict_keys(['boundingBox', 'words', 'confidence']) # # https://cloud.google.com/vision/docs/reference/rest/v1/images/annotate#Block full_text = '' boxes = [] if 'fullTextAnnotation' in result['document_text_detection']: fta = result['document_text_detection']['fullTextAnnotation'] full_text = fta['text'] for block in fta['pages'][0]['blocks']: for para in block['paragraphs']: for word in para['words']: text = '' for symbol in word['symbols']: text += symbol['text'] bb = word['boundingBox']['vertices'] corners = corner_list(bb) if corners: boxes.append( TextBox(boundingBox=corners, text=text)) else: # Something is wrong with the vertex list. # Skip it and continue. if __debug__: log('bad bb for {}: {}', text, bb) return TRResult(path=path, data=result, boxes=boxes, text=full_text, error=None) except google.api_core.exceptions.PermissionDenied as ex: text = 'Authentication failure for Google service -- {}'.format(ex) raise AuthFailure(text) except KeyboardInterrupt as ex: raise except Exception as ex: if isinstance(ex, KeyError): # Can happen if you control-C in the middle of the Google call. # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'" # printed to the terminal and we end up here. raise KeyboardInterrupt else: text = 'Error: {} -- {}'.format(str(ex), path) return TRResult(path=path, data={}, boxes=[], text='', error=text)
def amazon_result(self, file_path, variant, api_method, image_keyword, response_key, value_key, block_key): '''Returns the results from calling the service on the 'file_path'. The results are returned as an TRResult named tuple. ''' # Check if we already processed it. if file_path in self._results: if __debug__: log('returning already-known result for {}', file_path) return self._results[file_path] # Read the image and proceed with contacting the service. # If any exceptions occur, let them be passed to caller. (image, error) = self._image_from_file(file_path) if error: return TRResult(path=file_path, data={}, boxes=[], text='', error=error) if __debug__: log('setting up Amazon client function "{}"', variant) creds = self._credentials try: client = boto3.client( variant, region_name=creds['region_name'], aws_access_key_id=creds['aws_access_key_id'], aws_secret_access_key=creds['aws_secret_access_key']) if __debug__: log('calling Amazon API function') response = getattr(client, api_method)(**{ image_keyword: { 'Bytes': image } }) if __debug__: log('received {} blocks', len(response[response_key])) full_text = '' boxes = [] width, height = imagesize.get(file_path) for block in response[response_key]: if value_key in block and block[value_key] == "WORD": text = block[block_key] full_text += (text + ' ') corners = corner_list(block['Geometry']['Polygon'], width, height) if corners: boxes.append(TextBox(boundingBox=corners, text=text)) else: # Something's wrong with the vertex list. Skip & continue. if __debug__: log('bad bb for {}: {}', text, bb) result = TRResult(path=file_path, data=response, boxes=boxes, text=full_text, error=None) self._results[file_path] = result return result except KeyboardInterrupt as ex: raise except Exception as ex: text = 'Error: {} -- {}'.format(str(ex), file_path) return TRResult(path=file_path, data={}, boxes=[], text='', error=text)