예제 #1
0
 def _api(self, get_or_post, url, headers, data = None, polling = False):
     from handprint.network import net
     response, error = net(get_or_post, url, headers = headers,
                           data = data, polling = polling)
     if isinstance(error, NetworkFailure):
         if __debug__: log(f'network exception: {str(error)}')
         return TRResult(path = path, data = {}, text = '', error = str(error))
     elif isinstance(error, RateLimitExceeded):
         # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits
         # The headers have a Retry-After number in seconds in some cases
         # but not others, so we default to something just in case.
         sleep_time = 20
         if 'Retry-After' in response.headers:
             sleep_time = int(response.headers['Retry-After'])
         if __debug__: log(f'sleeping for {sleep_time} s and retrying')
         wait(sleep_time)
         return self._api(get_or_post, url, headers, data, polling) # Recurse
     elif error:
         if isinstance(error, ServiceFailure):
             # If it was an error generated by the Microsoft service, there
             # will be additional details in the response.  Check for it.
             try:
                 json_response = response.json()
                 if json_response and json_response.get('error', None):
                     error = json_response['error']
                     if 'code' in error:
                         code = error['code']
                         message = error['message']
                         raise ServiceFailure('Microsoft returned error code '
                                              + code + ' -- ' + message)
             except:
                 pass
         raise error
     else:
         return response
예제 #2
0
    def result(self, path, result = None):
        '''Returns the result from calling the service on the 'file_path'.
        The result is returned as an TRResult named tuple.
        '''
        if not result:
            result = self._result_from_api(path)
            if isinstance(result, tuple):
                return result

        lines = []
        full_text = ''
        if 'analyzeResult' in result:
            analysis = result['analyzeResult']
            if 'readResults' in analysis:
                # We only return the 1st page.  FIXME: should check if > 1.
                lines = analysis['readResults'][0]['lines']
                sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1],
                                                              x['boundingBox'][0]))
                full_text = '\n'.join(x['text'] for x in sorted_lines)

        # Create our particular box structure for annotations.  The Microsoft
        # structure is like this: data['recognitionResult']['lines'] contains
        # a list of dict with keys 'words', 'boundingBox', and 'text'.

        boxes = []
        for line in lines:
            # Microsoft doesn't put confidence scores on the lines.
            boxes.append(Box(kind = 'line', bb = line['boundingBox'], text = '',
                             score = 1.0))
            for word in line['words']:
                boxes.append(Box(kind = 'word', bb = word['boundingBox'],
                                 text = word['text'], score = word['confidence']))

        # Put it all together.
        return TRResult(path = path, data = result, text = full_text,
                        boxes = boxes, error = None)
예제 #3
0
    def result(self, path):
        '''Returns the results from calling the service on the 'path'.  The
        results are returned as an TRResult named tuple.
        '''
        # Read the image and proceed with contacting the service.
        (image, error) = self._image_from_file(path)
        if error:
            return error

        try:
            if __debug__: log('building Google vision API object')
            client = gv.ImageAnnotatorClient()
            image = gv.types.Image(content=image)
            context = gv.types.ImageContext(
                language_hints=['en-t-i0-handwrit'])

            # Iterate over the known API calls and store each result.
            result = dict.fromkeys(self._known_features)
            for feature in self._known_features:
                if __debug__:
                    log('sending image to Google for {} ...', feature)
                response = getattr(client, feature)(image=image,
                                                    image_context=context)
                if __debug__: log('received result.')
                result[feature] = MessageToDict(response)

            # Extract text and bounding boxes into our format.
            # Their structure looks like this:
            #
            # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0].keys()
            #   --> dict_keys(['boundingBox', 'confidence', 'paragraphs', 'blockType'])
            #
            # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0]['paragraphs'][0].keys()
            #   --> dict_keys(['boundingBox', 'words', 'confidence'])
            #
            # https://cloud.google.com/vision/docs/reference/rest/v1/images/annotate#Block

            full_text = ''
            boxes = []
            if 'fullTextAnnotation' in result['document_text_detection']:
                fta = result['document_text_detection']['fullTextAnnotation']
                full_text = fta['text']
                for block in fta['pages'][0]['blocks']:
                    for para in block['paragraphs']:
                        for word in para['words']:
                            text = ''
                            for symbol in word['symbols']:
                                text += symbol['text']
                            bb = word['boundingBox']['vertices']
                            corners = corner_list(bb)
                            if corners:
                                boxes.append(
                                    TextBox(boundingBox=corners, text=text))
                            else:
                                # Something is wrong with the vertex list.
                                # Skip it and continue.
                                if __debug__:
                                    log('bad bb for {}: {}', text, bb)

            return TRResult(path=path,
                            data=result,
                            boxes=boxes,
                            text=full_text,
                            error=None)
        except google.api_core.exceptions.PermissionDenied as ex:
            text = 'Authentication failure for Google service -- {}'.format(ex)
            raise AuthFailure(text)
        except KeyboardInterrupt as ex:
            raise
        except Exception as ex:
            if isinstance(ex, KeyError):
                # Can happen if you control-C in the middle of the Google call.
                # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'"
                # printed to the terminal and we end up here.
                raise KeyboardInterrupt
            else:
                text = 'Error: {} -- {}'.format(str(ex), path)
                return TRResult(path=path,
                                data={},
                                boxes=[],
                                text='',
                                error=text)
예제 #4
0
    def result(self, path):
        '''Returns all the results from calling the service on the 'path'. The
        results are returned as an TRResult named tuple.
        '''
        # Check if we already processed it.
        if path in self._results:
            if __debug__: log('returning already-known result for {}', path)
            return self._results[path]

        # Read the image and proceed with contacting the service.
        (image, error) = self._image_from_file(path)
        if error:
            return error

        base_url = 'https://westus.api.cognitive.microsoft.com/vision/v2.0/'
        url = base_url + 'recognizeText'
        params  = {'mode': 'Handwritten'}
        headers = {'Ocp-Apim-Subscription-Key': self._credentials,
                   'Content-Type': 'application/octet-stream'}

        # The Microsoft API for extracting text requires two phases: one call
        # to submit the image for processing, then polling to wait until the
        # text is ready to be retrieved.

        if __debug__: log('sending file to MS cloud service')
        response, error = net('post', url, headers = headers, params = params, data = image)
        if isinstance(error, NetworkFailure):
            if __debug__: log('network exception: {}', str(error))
            return TRResult(path = path, data = {}, text = '', error = str(error))
        elif isinstance(error, RateLimitExceeded):
            # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits
            # The headers should have a Retry-After number in seconds.
            sleep_time = 30
            if 'Retry-After' in response.headers:
                sleep_time = int(response.headers['Retry-After'])
            if __debug__: log('sleeping for {} s and retrying', sleep_time)
            sleep(sleep_time)
            return self.result(path)    # Recursive invocation
        elif error:
            raise error

        if 'Operation-Location' in response.headers:
            polling_url = response.headers['Operation-Location']
        else:
            if __debug__: log('no operation-location in response headers')
            raise ServiceFailure('Unexpected response from Microsoft server')

        if __debug__: log('polling MS for results ...')
        analysis = {}
        poll = True
        while poll:
            # I never have seen results returned in 1 second, and meanwhile
            # the repeated polling counts against your rate limit.  So, wait
            # for 2 s to reduce the number of calls.
            sleep(2)
            response, error = net('get', polling_url, polling = True, headers = headers)
            if isinstance(error, NetworkFailure):
                if __debug__: log('network exception: {}', str(error))
                return TRResult(path = path, data = {}, text = '', error = str(error))
            elif isinstance(error, RateLimitExceeded):
                # Pause to let the server reset its timers.  It seems that MS
                # doesn't send back a Retry-After header when rated limited
                # during polling, but I'm going to check it anyway, in case.
                sleep_time = 30
                if 'Retry-After' in response.headers:
                    sleep_time = int(response.headers['Retry-After'])
                if __debug__: log('sleeping for {} s and retrying', sleep_time)
                sleep(sleep_time)
            elif error:
                raise error

            # Sometimes the response comes back without content.  I don't know
            # if that's a bug in the Azure system or not.  It's not clear what
            # else should be done except keep going.
            if response.text:
                analysis = response.json()
                if 'recognitionResult' in analysis:
                    poll = False
                if 'status' in analysis and analysis['status'] == 'Failed':
                    poll = False
            else:
                if __debug__: log('received empty result from Microsoft.')
        if __debug__: log('results received.')

        # Have to extract the text into a single string.
        full_text = ''
        if 'recognitionResult' in analysis:
            lines = analysis['recognitionResult']['lines']
            sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0]))
            full_text = '\n'.join(x['text'] for x in sorted_lines)

        # Create our particular box structure for annotations.  The Microsoft
        # structure is like this: data['recognitionResult']['lines'] contains
        # a list of dict with keys 'words', 'boundingBox', and 'text'.

        boxes = []
        for chunk in lines:
            boxes.append(TextBox(boundingBox = chunk['boundingBox'], text = chunk['text']))

        # Put it all together.
        self._results[path] = TRResult(path = path, data = analysis,
                                       text = full_text, boxes = boxes,
                                       error = None)
        return self._results[path]
예제 #5
0
    def _result_from_api(self, path):
        # Read the image and proceed with contacting the service.
        (image, error) = self._image_from_file(path)
        if error:
            return error

        endpoint = self._credentials['endpoint']
        key = self._credentials['subscription_key']
        url = f'{endpoint}/vision/v3.2/read/analyze'
        headers = {'Ocp-Apim-Subscription-Key': key,
                   'Content-Type': 'application/octet-stream'}

        # The Microsoft API requires 2 phases: first submit the image for
        # processing, then wait & poll until the text is ready to be retrieved.

        if __debug__: log(f'contacting Microsoft for {relative(path)}')
        response = self._api('post', url, headers, image)
        if isinstance(response, tuple):
            return response             # If get back a tuple, it's an error.

        if 'Operation-Location' in response.headers:
            poll_url = response.headers['Operation-Location']
        else:
            if __debug__: log('no operation-location in response headers')
            raise ServiceFailure('Unexpected response from Microsoft server')
        if __debug__: log('polling MS for results ...')
        analysis = {}
        poll = True
        while poll:
            raise_for_interrupts()
            # Have never seen results returned in 1 s, and meanwhile, polling
            # still counts against our rate limit.  Wait 2 s to reduce calls.
            wait(2)
            response = self._api('get', poll_url, headers = headers, polling = True)
            if isinstance(response, tuple):
                return response         # If get back a tuple, it's an error.

            # Sometimes the response has no content.  I don't know why.
            # It's not clear what else can be done except to keep trying.
            if not response.text:
                if __debug__: log('received empty result from Microsoft.')
                continue

            analysis = response.json()
            if 'status' in analysis:
                if analysis['status'] in ('notStarted', 'running'):
                    if __debug__: log('Microsoft still processing image')
                    poll = True
                elif analysis['status'] == 'succeeded':
                    if __debug__: log('Microsoft returned success code')
                    poll = False
                else:
                    if analysis['status'] == 'failed':
                        text = 'Microsoft analysis failed'
                    else:
                        text = 'Error: Microsoft returned unexpected result'
                    return TRResult(path = path, data = {}, text = '',
                                    boxes = [], error = text)
            else:
                # No status key in JSON results means something's wrong.
                text = 'Error: Microsoft results not in expected format'
                return TRResult(path = path, data = {}, text = '',
                                boxes = [], error = text)

        if __debug__: log(f'results received from Microsoft for {relative(path)}')
        return analysis
예제 #6
0
파일: amazon.py 프로젝트: ccarvel/handprint
    def amazon_result(self, file_path, variant, method, image_keyword,
                      result_key, value_key, block_key, result):
        '''Returns the result from calling the service on the 'file_path'.
        The result is returned as an TRResult named tuple.
        '''

        # Delay loading the API packages until needed because they take time to
        # load.  Doing this speeds up overall application start time.
        import boto3
        import botocore

        if not result:
            # If any exceptions occur, let them be passed to caller.
            (image, error) = self._image_from_file(file_path)
            if error:
                return TRResult(path=file_path,
                                data={},
                                boxes=[],
                                text='',
                                error=error)
            try:
                if __debug__:
                    log(f'setting up Amazon client function "{variant}"')
                creds = self._credentials
                session = boto3.session.Session()
                client = session.client(
                    variant,
                    region_name=creds['region_name'],
                    aws_access_key_id=creds['aws_access_key_id'],
                    aws_secret_access_key=creds['aws_secret_access_key'])
                if __debug__: log('calling Amazon API function')
                result = getattr(client, method)(**{
                    image_keyword: {
                        'Bytes': image
                    }
                })
                if __debug__: log(f'received {len(result[result_key])} blocks')
            except botocore.exceptions.EndpointConnectionError as ex:
                raise AuthFailure(
                    f'Problem with credentials file -- {str(ex)}')
            except KeyboardInterrupt as ex:
                raise
            except KeyError as ex:
                msg = f'Amazon credentials file is missing {",".join(ex.args)}'
                raise AuthFailure(msg)
            except Exception as ex:
                if getattr(ex, 'response', False) and 'Error' in ex.response:
                    error = ex.response['Error']
                    code = error['Code']
                    text = error['Message']
                    path = relative(file_path)
                    if code in [
                            'UnsupportedDocumentException',
                            'BadDocumentException'
                    ]:
                        msg = f'Amazon {variant} reports bad or corrupted image in {path}'
                        raise CorruptedContent(msg)
                    elif code in [
                            'InvalidSignatureException',
                            'UnrecognizedClientException'
                    ]:
                        raise AuthFailure(
                            f'Problem with credentials file -- {text}')
                # Fallback if we can't get details.
                if __debug__: log(f'Amazon returned exception {str(ex)}')
                msg = f'Amazon {variant} failure for {path} -- {error["Message"]}'
                raise ServiceFailure(msg)

        raise_for_interrupts()
        full_text = ''
        boxes = []
        width, height = imagesize.get(file_path)
        if __debug__: log(f'parsing Amazon result for {relative(file_path)}')
        for block in result[result_key]:
            if value_key not in block:
                continue
            kind = block[value_key].lower()
            if kind in ['word', 'line']:
                text = block[block_key]
                corners = corner_list(block['Geometry']['Polygon'], width,
                                      height)
                if corners:
                    boxes.append(
                        Box(kind=kind,
                            bb=corners,
                            text=text,
                            score=block['Confidence'] / 100))
                else:
                    # Something's wrong with the vertex list. Skip & continue.
                    if __debug__: log(f'bad bb for {text}: {bb}')
            if kind == "line":
                if 'Text' in block:
                    full_text += block['Text'] + '\n'
                elif 'DetectedText' in block:
                    full_text += block['DetectedText'] + '\n'
        return TRResult(path=file_path,
                        data=result,
                        boxes=boxes,
                        text=full_text,
                        error=None)
예제 #7
0
파일: amazon.py 프로젝트: sahwar/handprint
    def amazon_result(self, file_path, variant, api_method, image_keyword,
                      response_key, value_key, block_key):
        '''Returns the results from calling the service on the 'file_path'.
        The results are returned as an TRResult named tuple.
        '''
        # Check if we already processed it.
        if file_path in self._results:
            if __debug__:
                log('returning already-known result for {}', file_path)
            return self._results[file_path]

        # Read the image and proceed with contacting the service.
        # If any exceptions occur, let them be passed to caller.
        (image, error) = self._image_from_file(file_path)
        if error:
            return TRResult(path=file_path,
                            data={},
                            boxes=[],
                            text='',
                            error=error)

        if __debug__: log('setting up Amazon client function "{}"', variant)
        creds = self._credentials
        try:
            client = boto3.client(
                variant,
                region_name=creds['region_name'],
                aws_access_key_id=creds['aws_access_key_id'],
                aws_secret_access_key=creds['aws_secret_access_key'])
            if __debug__: log('calling Amazon API function')
            response = getattr(client, api_method)(**{
                image_keyword: {
                    'Bytes': image
                }
            })
            if __debug__:
                log('received {} blocks', len(response[response_key]))
            full_text = ''
            boxes = []
            width, height = imagesize.get(file_path)
            for block in response[response_key]:
                if value_key in block and block[value_key] == "WORD":
                    text = block[block_key]
                    full_text += (text + ' ')
                    corners = corner_list(block['Geometry']['Polygon'], width,
                                          height)
                    if corners:
                        boxes.append(TextBox(boundingBox=corners, text=text))
                    else:
                        # Something's wrong with the vertex list. Skip & continue.
                        if __debug__: log('bad bb for {}: {}', text, bb)

            result = TRResult(path=file_path,
                              data=response,
                              boxes=boxes,
                              text=full_text,
                              error=None)
            self._results[file_path] = result
            return result
        except KeyboardInterrupt as ex:
            raise
        except Exception as ex:
            text = 'Error: {} -- {}'.format(str(ex), file_path)
            return TRResult(path=file_path,
                            data={},
                            boxes=[],
                            text='',
                            error=text)
예제 #8
0
    def result(self, path, result=None):
        '''Returns the result from calling the service on the 'file_path'.
        The result is returned as an TRResult named tuple.
        '''

        # Delay loading the API packages until needed because they take time to
        # load.  Doing this speeds up overall application start time.
        import google
        from google.cloud import vision_v1 as gv
        from google.api_core.exceptions import PermissionDenied
        from google.protobuf.json_format import MessageToDict

        if not result:
            # Read the image and proceed with contacting the service.
            (image, error) = self._image_from_file(path)
            if error:
                return error

            if __debug__:
                log(f'building Google API object for {relative(path)}')
            try:
                client = gv.ImageAnnotatorClient()
                params = gv.TextDetectionParams(
                    mapping={'enable_text_detection_confidence_score': True})
                context = gv.ImageContext(language_hints=['en-t-i0-handwrit'],
                                          text_detection_params=params)
                img = gv.Image(content=image)
                if __debug__:
                    log(f'sending image to Google for {relative(path)} ...')
                response = client.document_text_detection(
                    image=img, image_context=context)
                if __debug__:
                    log(f'received result from Google for {relative(path)}')
                result = dict_from_response(response)
            except google.api_core.exceptions.PermissionDenied as ex:
                text = 'Authentication failure for Google service -- {}'.format(
                    ex)
                raise AuthFailure(text)
            except google.auth.exceptions.DefaultCredentialsError as ex:
                text = 'Credentials file error for Google service -- {}'.format(
                    ex)
                raise AuthFailure(text)
            except google.api_core.exceptions.ServiceUnavailable as ex:
                text = 'Network, service, or Google configuration error -- {}'.format(
                    ex)
                raise ServiceFailure(text)
            except KeyboardInterrupt as ex:
                raise
            except Exception as ex:
                if isinstance(ex, KeyError):
                    # Can happen if you control-C in the middle of the Google call.
                    # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'"
                    # printed to the terminal and we end up here.
                    raise KeyboardInterrupt
                else:
                    text = 'Error: {} -- {}'.format(str(ex), path)
                    return TRResult(path=path,
                                    data={},
                                    boxes=[],
                                    text='',
                                    error=text)

        raise_for_interrupts()
        boxes = []
        # See this page for more information about the structure:
        # https://cloud.google.com/vision/docs/handwriting#python
        if len(result['full_text_annotation']['pages']) > 1:
            warn('More than one page received from Google; using only first.')
        for block in result['full_text_annotation']['pages'][0]['blocks']:
            for para in block['paragraphs']:
                corners = corner_list(para['bounding_box']['vertices'])
                boxes.append(
                    Box(bb=corners,
                        kind='para',
                        text='',
                        score=para['confidence']))
                for word in para['words']:
                    text = ''
                    for symbol in word['symbols']:
                        text += symbol['text']
                    corners = corner_list(word['bounding_box']['vertices'])
                    if corners:
                        boxes.append(
                            Box(bb=corners,
                                kind='word',
                                text=text,
                                score=para['confidence']))
                    else:
                        # Something is wrong with the vertex list.
                        # Skip it and continue.
                        if __debug__: log(f'bad bb for {text}: {bb}')
        full_text = result['full_text_annotation']['text']
        return TRResult(path=path,
                        data=result,
                        boxes=boxes,
                        text=full_text,
                        error=None)