def _api(self, get_or_post, url, headers, data = None, polling = False): from handprint.network import net response, error = net(get_or_post, url, headers = headers, data = data, polling = polling) if isinstance(error, NetworkFailure): if __debug__: log(f'network exception: {str(error)}') return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits # The headers have a Retry-After number in seconds in some cases # but not others, so we default to something just in case. sleep_time = 20 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log(f'sleeping for {sleep_time} s and retrying') wait(sleep_time) return self._api(get_or_post, url, headers, data, polling) # Recurse elif error: if isinstance(error, ServiceFailure): # If it was an error generated by the Microsoft service, there # will be additional details in the response. Check for it. try: json_response = response.json() if json_response and json_response.get('error', None): error = json_response['error'] if 'code' in error: code = error['code'] message = error['message'] raise ServiceFailure('Microsoft returned error code ' + code + ' -- ' + message) except: pass raise error else: return response
def result(self, path, result = None): '''Returns the result from calling the service on the 'file_path'. The result is returned as an TRResult named tuple. ''' if not result: result = self._result_from_api(path) if isinstance(result, tuple): return result lines = [] full_text = '' if 'analyzeResult' in result: analysis = result['analyzeResult'] if 'readResults' in analysis: # We only return the 1st page. FIXME: should check if > 1. lines = analysis['readResults'][0]['lines'] sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0])) full_text = '\n'.join(x['text'] for x in sorted_lines) # Create our particular box structure for annotations. The Microsoft # structure is like this: data['recognitionResult']['lines'] contains # a list of dict with keys 'words', 'boundingBox', and 'text'. boxes = [] for line in lines: # Microsoft doesn't put confidence scores on the lines. boxes.append(Box(kind = 'line', bb = line['boundingBox'], text = '', score = 1.0)) for word in line['words']: boxes.append(Box(kind = 'word', bb = word['boundingBox'], text = word['text'], score = word['confidence'])) # Put it all together. return TRResult(path = path, data = result, text = full_text, boxes = boxes, error = None)
def result(self, path): '''Returns the results from calling the service on the 'path'. The results are returned as an TRResult named tuple. ''' # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error try: if __debug__: log('building Google vision API object') client = gv.ImageAnnotatorClient() image = gv.types.Image(content=image) context = gv.types.ImageContext( language_hints=['en-t-i0-handwrit']) # Iterate over the known API calls and store each result. result = dict.fromkeys(self._known_features) for feature in self._known_features: if __debug__: log('sending image to Google for {} ...', feature) response = getattr(client, feature)(image=image, image_context=context) if __debug__: log('received result.') result[feature] = MessageToDict(response) # Extract text and bounding boxes into our format. # Their structure looks like this: # # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0].keys() # --> dict_keys(['boundingBox', 'confidence', 'paragraphs', 'blockType']) # # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0]['paragraphs'][0].keys() # --> dict_keys(['boundingBox', 'words', 'confidence']) # # https://cloud.google.com/vision/docs/reference/rest/v1/images/annotate#Block full_text = '' boxes = [] if 'fullTextAnnotation' in result['document_text_detection']: fta = result['document_text_detection']['fullTextAnnotation'] full_text = fta['text'] for block in fta['pages'][0]['blocks']: for para in block['paragraphs']: for word in para['words']: text = '' for symbol in word['symbols']: text += symbol['text'] bb = word['boundingBox']['vertices'] corners = corner_list(bb) if corners: boxes.append( TextBox(boundingBox=corners, text=text)) else: # Something is wrong with the vertex list. # Skip it and continue. if __debug__: log('bad bb for {}: {}', text, bb) return TRResult(path=path, data=result, boxes=boxes, text=full_text, error=None) except google.api_core.exceptions.PermissionDenied as ex: text = 'Authentication failure for Google service -- {}'.format(ex) raise AuthFailure(text) except KeyboardInterrupt as ex: raise except Exception as ex: if isinstance(ex, KeyError): # Can happen if you control-C in the middle of the Google call. # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'" # printed to the terminal and we end up here. raise KeyboardInterrupt else: text = 'Error: {} -- {}'.format(str(ex), path) return TRResult(path=path, data={}, boxes=[], text='', error=text)
def result(self, path): '''Returns all the results from calling the service on the 'path'. The results are returned as an TRResult named tuple. ''' # Check if we already processed it. if path in self._results: if __debug__: log('returning already-known result for {}', path) return self._results[path] # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error base_url = 'https://westus.api.cognitive.microsoft.com/vision/v2.0/' url = base_url + 'recognizeText' params = {'mode': 'Handwritten'} headers = {'Ocp-Apim-Subscription-Key': self._credentials, 'Content-Type': 'application/octet-stream'} # The Microsoft API for extracting text requires two phases: one call # to submit the image for processing, then polling to wait until the # text is ready to be retrieved. if __debug__: log('sending file to MS cloud service') response, error = net('post', url, headers = headers, params = params, data = image) if isinstance(error, NetworkFailure): if __debug__: log('network exception: {}', str(error)) return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits # The headers should have a Retry-After number in seconds. sleep_time = 30 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log('sleeping for {} s and retrying', sleep_time) sleep(sleep_time) return self.result(path) # Recursive invocation elif error: raise error if 'Operation-Location' in response.headers: polling_url = response.headers['Operation-Location'] else: if __debug__: log('no operation-location in response headers') raise ServiceFailure('Unexpected response from Microsoft server') if __debug__: log('polling MS for results ...') analysis = {} poll = True while poll: # I never have seen results returned in 1 second, and meanwhile # the repeated polling counts against your rate limit. So, wait # for 2 s to reduce the number of calls. sleep(2) response, error = net('get', polling_url, polling = True, headers = headers) if isinstance(error, NetworkFailure): if __debug__: log('network exception: {}', str(error)) return TRResult(path = path, data = {}, text = '', error = str(error)) elif isinstance(error, RateLimitExceeded): # Pause to let the server reset its timers. It seems that MS # doesn't send back a Retry-After header when rated limited # during polling, but I'm going to check it anyway, in case. sleep_time = 30 if 'Retry-After' in response.headers: sleep_time = int(response.headers['Retry-After']) if __debug__: log('sleeping for {} s and retrying', sleep_time) sleep(sleep_time) elif error: raise error # Sometimes the response comes back without content. I don't know # if that's a bug in the Azure system or not. It's not clear what # else should be done except keep going. if response.text: analysis = response.json() if 'recognitionResult' in analysis: poll = False if 'status' in analysis and analysis['status'] == 'Failed': poll = False else: if __debug__: log('received empty result from Microsoft.') if __debug__: log('results received.') # Have to extract the text into a single string. full_text = '' if 'recognitionResult' in analysis: lines = analysis['recognitionResult']['lines'] sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0])) full_text = '\n'.join(x['text'] for x in sorted_lines) # Create our particular box structure for annotations. The Microsoft # structure is like this: data['recognitionResult']['lines'] contains # a list of dict with keys 'words', 'boundingBox', and 'text'. boxes = [] for chunk in lines: boxes.append(TextBox(boundingBox = chunk['boundingBox'], text = chunk['text'])) # Put it all together. self._results[path] = TRResult(path = path, data = analysis, text = full_text, boxes = boxes, error = None) return self._results[path]
def _result_from_api(self, path): # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error endpoint = self._credentials['endpoint'] key = self._credentials['subscription_key'] url = f'{endpoint}/vision/v3.2/read/analyze' headers = {'Ocp-Apim-Subscription-Key': key, 'Content-Type': 'application/octet-stream'} # The Microsoft API requires 2 phases: first submit the image for # processing, then wait & poll until the text is ready to be retrieved. if __debug__: log(f'contacting Microsoft for {relative(path)}') response = self._api('post', url, headers, image) if isinstance(response, tuple): return response # If get back a tuple, it's an error. if 'Operation-Location' in response.headers: poll_url = response.headers['Operation-Location'] else: if __debug__: log('no operation-location in response headers') raise ServiceFailure('Unexpected response from Microsoft server') if __debug__: log('polling MS for results ...') analysis = {} poll = True while poll: raise_for_interrupts() # Have never seen results returned in 1 s, and meanwhile, polling # still counts against our rate limit. Wait 2 s to reduce calls. wait(2) response = self._api('get', poll_url, headers = headers, polling = True) if isinstance(response, tuple): return response # If get back a tuple, it's an error. # Sometimes the response has no content. I don't know why. # It's not clear what else can be done except to keep trying. if not response.text: if __debug__: log('received empty result from Microsoft.') continue analysis = response.json() if 'status' in analysis: if analysis['status'] in ('notStarted', 'running'): if __debug__: log('Microsoft still processing image') poll = True elif analysis['status'] == 'succeeded': if __debug__: log('Microsoft returned success code') poll = False else: if analysis['status'] == 'failed': text = 'Microsoft analysis failed' else: text = 'Error: Microsoft returned unexpected result' return TRResult(path = path, data = {}, text = '', boxes = [], error = text) else: # No status key in JSON results means something's wrong. text = 'Error: Microsoft results not in expected format' return TRResult(path = path, data = {}, text = '', boxes = [], error = text) if __debug__: log(f'results received from Microsoft for {relative(path)}') return analysis
def amazon_result(self, file_path, variant, method, image_keyword, result_key, value_key, block_key, result): '''Returns the result from calling the service on the 'file_path'. The result is returned as an TRResult named tuple. ''' # Delay loading the API packages until needed because they take time to # load. Doing this speeds up overall application start time. import boto3 import botocore if not result: # If any exceptions occur, let them be passed to caller. (image, error) = self._image_from_file(file_path) if error: return TRResult(path=file_path, data={}, boxes=[], text='', error=error) try: if __debug__: log(f'setting up Amazon client function "{variant}"') creds = self._credentials session = boto3.session.Session() client = session.client( variant, region_name=creds['region_name'], aws_access_key_id=creds['aws_access_key_id'], aws_secret_access_key=creds['aws_secret_access_key']) if __debug__: log('calling Amazon API function') result = getattr(client, method)(**{ image_keyword: { 'Bytes': image } }) if __debug__: log(f'received {len(result[result_key])} blocks') except botocore.exceptions.EndpointConnectionError as ex: raise AuthFailure( f'Problem with credentials file -- {str(ex)}') except KeyboardInterrupt as ex: raise except KeyError as ex: msg = f'Amazon credentials file is missing {",".join(ex.args)}' raise AuthFailure(msg) except Exception as ex: if getattr(ex, 'response', False) and 'Error' in ex.response: error = ex.response['Error'] code = error['Code'] text = error['Message'] path = relative(file_path) if code in [ 'UnsupportedDocumentException', 'BadDocumentException' ]: msg = f'Amazon {variant} reports bad or corrupted image in {path}' raise CorruptedContent(msg) elif code in [ 'InvalidSignatureException', 'UnrecognizedClientException' ]: raise AuthFailure( f'Problem with credentials file -- {text}') # Fallback if we can't get details. if __debug__: log(f'Amazon returned exception {str(ex)}') msg = f'Amazon {variant} failure for {path} -- {error["Message"]}' raise ServiceFailure(msg) raise_for_interrupts() full_text = '' boxes = [] width, height = imagesize.get(file_path) if __debug__: log(f'parsing Amazon result for {relative(file_path)}') for block in result[result_key]: if value_key not in block: continue kind = block[value_key].lower() if kind in ['word', 'line']: text = block[block_key] corners = corner_list(block['Geometry']['Polygon'], width, height) if corners: boxes.append( Box(kind=kind, bb=corners, text=text, score=block['Confidence'] / 100)) else: # Something's wrong with the vertex list. Skip & continue. if __debug__: log(f'bad bb for {text}: {bb}') if kind == "line": if 'Text' in block: full_text += block['Text'] + '\n' elif 'DetectedText' in block: full_text += block['DetectedText'] + '\n' return TRResult(path=file_path, data=result, boxes=boxes, text=full_text, error=None)
def amazon_result(self, file_path, variant, api_method, image_keyword, response_key, value_key, block_key): '''Returns the results from calling the service on the 'file_path'. The results are returned as an TRResult named tuple. ''' # Check if we already processed it. if file_path in self._results: if __debug__: log('returning already-known result for {}', file_path) return self._results[file_path] # Read the image and proceed with contacting the service. # If any exceptions occur, let them be passed to caller. (image, error) = self._image_from_file(file_path) if error: return TRResult(path=file_path, data={}, boxes=[], text='', error=error) if __debug__: log('setting up Amazon client function "{}"', variant) creds = self._credentials try: client = boto3.client( variant, region_name=creds['region_name'], aws_access_key_id=creds['aws_access_key_id'], aws_secret_access_key=creds['aws_secret_access_key']) if __debug__: log('calling Amazon API function') response = getattr(client, api_method)(**{ image_keyword: { 'Bytes': image } }) if __debug__: log('received {} blocks', len(response[response_key])) full_text = '' boxes = [] width, height = imagesize.get(file_path) for block in response[response_key]: if value_key in block and block[value_key] == "WORD": text = block[block_key] full_text += (text + ' ') corners = corner_list(block['Geometry']['Polygon'], width, height) if corners: boxes.append(TextBox(boundingBox=corners, text=text)) else: # Something's wrong with the vertex list. Skip & continue. if __debug__: log('bad bb for {}: {}', text, bb) result = TRResult(path=file_path, data=response, boxes=boxes, text=full_text, error=None) self._results[file_path] = result return result except KeyboardInterrupt as ex: raise except Exception as ex: text = 'Error: {} -- {}'.format(str(ex), file_path) return TRResult(path=file_path, data={}, boxes=[], text='', error=text)
def result(self, path, result=None): '''Returns the result from calling the service on the 'file_path'. The result is returned as an TRResult named tuple. ''' # Delay loading the API packages until needed because they take time to # load. Doing this speeds up overall application start time. import google from google.cloud import vision_v1 as gv from google.api_core.exceptions import PermissionDenied from google.protobuf.json_format import MessageToDict if not result: # Read the image and proceed with contacting the service. (image, error) = self._image_from_file(path) if error: return error if __debug__: log(f'building Google API object for {relative(path)}') try: client = gv.ImageAnnotatorClient() params = gv.TextDetectionParams( mapping={'enable_text_detection_confidence_score': True}) context = gv.ImageContext(language_hints=['en-t-i0-handwrit'], text_detection_params=params) img = gv.Image(content=image) if __debug__: log(f'sending image to Google for {relative(path)} ...') response = client.document_text_detection( image=img, image_context=context) if __debug__: log(f'received result from Google for {relative(path)}') result = dict_from_response(response) except google.api_core.exceptions.PermissionDenied as ex: text = 'Authentication failure for Google service -- {}'.format( ex) raise AuthFailure(text) except google.auth.exceptions.DefaultCredentialsError as ex: text = 'Credentials file error for Google service -- {}'.format( ex) raise AuthFailure(text) except google.api_core.exceptions.ServiceUnavailable as ex: text = 'Network, service, or Google configuration error -- {}'.format( ex) raise ServiceFailure(text) except KeyboardInterrupt as ex: raise except Exception as ex: if isinstance(ex, KeyError): # Can happen if you control-C in the middle of the Google call. # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'" # printed to the terminal and we end up here. raise KeyboardInterrupt else: text = 'Error: {} -- {}'.format(str(ex), path) return TRResult(path=path, data={}, boxes=[], text='', error=text) raise_for_interrupts() boxes = [] # See this page for more information about the structure: # https://cloud.google.com/vision/docs/handwriting#python if len(result['full_text_annotation']['pages']) > 1: warn('More than one page received from Google; using only first.') for block in result['full_text_annotation']['pages'][0]['blocks']: for para in block['paragraphs']: corners = corner_list(para['bounding_box']['vertices']) boxes.append( Box(bb=corners, kind='para', text='', score=para['confidence'])) for word in para['words']: text = '' for symbol in word['symbols']: text += symbol['text'] corners = corner_list(word['bounding_box']['vertices']) if corners: boxes.append( Box(bb=corners, kind='word', text=text, score=para['confidence'])) else: # Something is wrong with the vertex list. # Skip it and continue. if __debug__: log(f'bad bb for {text}: {bb}') full_text = result['full_text_annotation']['text'] return TRResult(path=path, data=result, boxes=boxes, text=full_text, error=None)