Example #1
0
    def result(self, path):
        '''Returns all the results from calling the service on the 'path'. The
        results are returned as an TRResult named tuple.
        '''
        # Check if we already processed it.
        if path in self._results:
            if __debug__: log('returning already-known result for {}', path)
            return self._results[path]

        # Read the image and proceed with contacting the service.
        (image, error) = self._image_from_file(path)
        if error:
            return error

        base_url = 'https://westus.api.cognitive.microsoft.com/vision/v2.0/'
        url = base_url + 'recognizeText'
        params  = {'mode': 'Handwritten'}
        headers = {'Ocp-Apim-Subscription-Key': self._credentials,
                   'Content-Type': 'application/octet-stream'}

        # The Microsoft API for extracting text requires two phases: one call
        # to submit the image for processing, then polling to wait until the
        # text is ready to be retrieved.

        if __debug__: log('sending file to MS cloud service')
        response, error = net('post', url, headers = headers, params = params, data = image)
        if isinstance(error, NetworkFailure):
            if __debug__: log('network exception: {}', str(error))
            return TRResult(path = path, data = {}, text = '', error = str(error))
        elif isinstance(error, RateLimitExceeded):
            # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits
            # The headers should have a Retry-After number in seconds.
            sleep_time = 30
            if 'Retry-After' in response.headers:
                sleep_time = int(response.headers['Retry-After'])
            if __debug__: log('sleeping for {} s and retrying', sleep_time)
            sleep(sleep_time)
            return self.result(path)    # Recursive invocation
        elif error:
            raise error

        if 'Operation-Location' in response.headers:
            polling_url = response.headers['Operation-Location']
        else:
            if __debug__: log('no operation-location in response headers')
            raise ServiceFailure('Unexpected response from Microsoft server')

        if __debug__: log('polling MS for results ...')
        analysis = {}
        poll = True
        while poll:
            # I never have seen results returned in 1 second, and meanwhile
            # the repeated polling counts against your rate limit.  So, wait
            # for 2 s to reduce the number of calls.
            sleep(2)
            response, error = net('get', polling_url, polling = True, headers = headers)
            if isinstance(error, NetworkFailure):
                if __debug__: log('network exception: {}', str(error))
                return TRResult(path = path, data = {}, text = '', error = str(error))
            elif isinstance(error, RateLimitExceeded):
                # Pause to let the server reset its timers.  It seems that MS
                # doesn't send back a Retry-After header when rated limited
                # during polling, but I'm going to check it anyway, in case.
                sleep_time = 30
                if 'Retry-After' in response.headers:
                    sleep_time = int(response.headers['Retry-After'])
                if __debug__: log('sleeping for {} s and retrying', sleep_time)
                sleep(sleep_time)
            elif error:
                raise error

            # Sometimes the response comes back without content.  I don't know
            # if that's a bug in the Azure system or not.  It's not clear what
            # else should be done except keep going.
            if response.text:
                analysis = response.json()
                if 'recognitionResult' in analysis:
                    poll = False
                if 'status' in analysis and analysis['status'] == 'Failed':
                    poll = False
            else:
                if __debug__: log('received empty result from Microsoft.')
        if __debug__: log('results received.')

        # Have to extract the text into a single string.
        full_text = ''
        if 'recognitionResult' in analysis:
            lines = analysis['recognitionResult']['lines']
            sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0]))
            full_text = '\n'.join(x['text'] for x in sorted_lines)

        # Create our particular box structure for annotations.  The Microsoft
        # structure is like this: data['recognitionResult']['lines'] contains
        # a list of dict with keys 'words', 'boundingBox', and 'text'.

        boxes = []
        for chunk in lines:
            boxes.append(TextBox(boundingBox = chunk['boundingBox'], text = chunk['text']))

        # Put it all together.
        self._results[path] = TRResult(path = path, data = analysis,
                                       text = full_text, boxes = boxes,
                                       error = None)
        return self._results[path]
Example #2
0
    def result(self, path):
        '''Returns the results from calling the service on the 'path'.  The
        results are returned as an TRResult named tuple.
        '''
        # Read the image and proceed with contacting the service.
        (image, error) = self._image_from_file(path)
        if error:
            return error

        try:
            if __debug__: log('building Google vision API object')
            client = gv.ImageAnnotatorClient()
            image = gv.types.Image(content=image)
            context = gv.types.ImageContext(
                language_hints=['en-t-i0-handwrit'])

            # Iterate over the known API calls and store each result.
            result = dict.fromkeys(self._known_features)
            for feature in self._known_features:
                if __debug__:
                    log('sending image to Google for {} ...', feature)
                response = getattr(client, feature)(image=image,
                                                    image_context=context)
                if __debug__: log('received result.')
                result[feature] = MessageToDict(response)

            # Extract text and bounding boxes into our format.
            # Their structure looks like this:
            #
            # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0].keys()
            #   --> dict_keys(['boundingBox', 'confidence', 'paragraphs', 'blockType'])
            #
            # result['document_text_detection']['fullTextAnnotation']['pages'][0]['blocks'][0]['paragraphs'][0].keys()
            #   --> dict_keys(['boundingBox', 'words', 'confidence'])
            #
            # https://cloud.google.com/vision/docs/reference/rest/v1/images/annotate#Block

            full_text = ''
            boxes = []
            if 'fullTextAnnotation' in result['document_text_detection']:
                fta = result['document_text_detection']['fullTextAnnotation']
                full_text = fta['text']
                for block in fta['pages'][0]['blocks']:
                    for para in block['paragraphs']:
                        for word in para['words']:
                            text = ''
                            for symbol in word['symbols']:
                                text += symbol['text']
                            bb = word['boundingBox']['vertices']
                            corners = corner_list(bb)
                            if corners:
                                boxes.append(
                                    TextBox(boundingBox=corners, text=text))
                            else:
                                # Something is wrong with the vertex list.
                                # Skip it and continue.
                                if __debug__:
                                    log('bad bb for {}: {}', text, bb)

            return TRResult(path=path,
                            data=result,
                            boxes=boxes,
                            text=full_text,
                            error=None)
        except google.api_core.exceptions.PermissionDenied as ex:
            text = 'Authentication failure for Google service -- {}'.format(ex)
            raise AuthFailure(text)
        except KeyboardInterrupt as ex:
            raise
        except Exception as ex:
            if isinstance(ex, KeyError):
                # Can happen if you control-C in the middle of the Google call.
                # Result is "Exception ignored in: 'grpc._cython.cygrpc._next'"
                # printed to the terminal and we end up here.
                raise KeyboardInterrupt
            else:
                text = 'Error: {} -- {}'.format(str(ex), path)
                return TRResult(path=path,
                                data={},
                                boxes=[],
                                text='',
                                error=text)
Example #3
0
    def amazon_result(self, file_path, variant, api_method, image_keyword,
                      response_key, value_key, block_key):
        '''Returns the results from calling the service on the 'file_path'.
        The results are returned as an TRResult named tuple.
        '''
        # Check if we already processed it.
        if file_path in self._results:
            if __debug__:
                log('returning already-known result for {}', file_path)
            return self._results[file_path]

        # Read the image and proceed with contacting the service.
        # If any exceptions occur, let them be passed to caller.
        (image, error) = self._image_from_file(file_path)
        if error:
            return TRResult(path=file_path,
                            data={},
                            boxes=[],
                            text='',
                            error=error)

        if __debug__: log('setting up Amazon client function "{}"', variant)
        creds = self._credentials
        try:
            client = boto3.client(
                variant,
                region_name=creds['region_name'],
                aws_access_key_id=creds['aws_access_key_id'],
                aws_secret_access_key=creds['aws_secret_access_key'])
            if __debug__: log('calling Amazon API function')
            response = getattr(client, api_method)(**{
                image_keyword: {
                    'Bytes': image
                }
            })
            if __debug__:
                log('received {} blocks', len(response[response_key]))
            full_text = ''
            boxes = []
            width, height = imagesize.get(file_path)
            for block in response[response_key]:
                if value_key in block and block[value_key] == "WORD":
                    text = block[block_key]
                    full_text += (text + ' ')
                    corners = corner_list(block['Geometry']['Polygon'], width,
                                          height)
                    if corners:
                        boxes.append(TextBox(boundingBox=corners, text=text))
                    else:
                        # Something's wrong with the vertex list. Skip & continue.
                        if __debug__: log('bad bb for {}: {}', text, bb)

            result = TRResult(path=file_path,
                              data=response,
                              boxes=boxes,
                              text=full_text,
                              error=None)
            self._results[file_path] = result
            return result
        except KeyboardInterrupt as ex:
            raise
        except Exception as ex:
            text = 'Error: {} -- {}'.format(str(ex), file_path)
            return TRResult(path=file_path,
                            data={},
                            boxes=[],
                            text='',
                            error=text)