Exemple #1
0
    def extract_text(self, data, languages=None):
        key = make_key('ocr', sha1(data).hexdigest())
        text = kv.get(key)
        if text is not None:
            # log.info('%s chars cached', len(text))
            return text.decode('utf-8')

        data = self.ensure_size(data)
        if data is None:
            return

        for attempt in range(1000):
            try:
                service = RecognizeTextStub(self.channel)
                languages = ensure_list(languages)
                image = Image(data=data, languages=languages)
                response = service.Recognize(image)
                text = response.text or ''
                log.info('OCR: %s chars', len(text))
                kv.set(key, text.encode('utf-8'))
                return text
            except self.Error as e:
                if e.code() == self.Status.RESOURCE_EXHAUSTED:
                    continue
                log.warning("gRPC [%s]: %s", e.code(), e.details())
                backoff(failures=attempt)
                self.reset_channel()
Exemple #2
0
    def extract_text(self, data, languages=None):
        if not MIN_SIZE < len(data) < MAX_SIZE:
            log.info('OCR: file size out of range (%d)', len(data))
            return None

        key = make_key('ocr', sha1(data).hexdigest())
        if kv.exists(key):
            text = kv.get(key)
            if text is not None:
                text = text.decode('utf-8')
                log.info('OCR: %s chars cached', len(text))
            return text

        # data = self.ensure_size(data)
        # if data is None:
        #     return

        for attempt in service_retries():
            try:
                service = RecognizeTextStub(self.channel)
                languages = ensure_list(languages)
                image = Image(data=data, languages=languages)
                response = service.Recognize(image)
                text = response.text
                if text is not None:
                    log.info('OCR: %s chars (from %s bytes)', len(text),
                             len(data))
                kv.set(key, text)
                return text
            except self.Error as e:
                if e.code() not in self.TEMPORARY_ERRORS:
                    return
                self.reset_channel()
                log.warning("gRPC [%s]: %s", e.code(), e.details())
                backoff(failures=attempt)
Exemple #3
0
    def extract_text(self, data, languages=None):
        key = make_key('ocr', sha1(data).hexdigest())
        text = kv.get(key)
        if text is not None:
            log.info('Vision API: %s chars cached', len(text))
            return text

        data = self.ensure_size(data)
        if data is not None:
            image = types.Image(content=data)
            res = self.client.document_text_detection(image)
            ann = res.full_text_annotation
            log.info('Vision API: %s chars recognized', len(ann.text))
            kv.set(key, ann.text)
            return ann.text
Exemple #4
0
    def extract_text(self, data, languages=None):
        if not MIN_SIZE < len(data) < MAX_SIZE:
            log.info('OCR: file size out of range (%d)', len(data))
            return None

        key = make_key('ocr', sha1(data).hexdigest())
        if kv.exists(key):
            text = kv.get(key)
            if text is not None:
                text = text.decode('utf-8')
                log.info('Vision API: %s chars cached', len(text))
            return text

        # data = self.ensure_size(data)
        # if data is None:
        #     return

        image = types.Image(content=data)
        res = self.client.document_text_detection(image)
        ann = res.full_text_annotation
        log.info('Vision API: %s chars recognized', len(ann.text))
        kv.set(key, ann.text)
        return ann.text
Exemple #5
0
 def key(self, *parts):
     return make_key(self.prefix, *parts)
Exemple #6
0
def place_key(name):
    return make_key(PLACE_KEY, name)