def extract_text(self, data, languages=None): key = make_key('ocr', sha1(data).hexdigest()) text = kv.get(key) if text is not None: # log.info('%s chars cached', len(text)) return text.decode('utf-8') data = self.ensure_size(data) if data is None: return for attempt in range(1000): try: service = RecognizeTextStub(self.channel) languages = ensure_list(languages) image = Image(data=data, languages=languages) response = service.Recognize(image) text = response.text or '' log.info('OCR: %s chars', len(text)) kv.set(key, text.encode('utf-8')) return text except self.Error as e: if e.code() == self.Status.RESOURCE_EXHAUSTED: continue log.warning("gRPC [%s]: %s", e.code(), e.details()) backoff(failures=attempt) self.reset_channel()
def extract_text(self, data, languages=None): if not MIN_SIZE < len(data) < MAX_SIZE: log.info('OCR: file size out of range (%d)', len(data)) return None key = make_key('ocr', sha1(data).hexdigest()) if kv.exists(key): text = kv.get(key) if text is not None: text = text.decode('utf-8') log.info('OCR: %s chars cached', len(text)) return text # data = self.ensure_size(data) # if data is None: # return for attempt in service_retries(): try: service = RecognizeTextStub(self.channel) languages = ensure_list(languages) image = Image(data=data, languages=languages) response = service.Recognize(image) text = response.text if text is not None: log.info('OCR: %s chars (from %s bytes)', len(text), len(data)) kv.set(key, text) return text except self.Error as e: if e.code() not in self.TEMPORARY_ERRORS: return self.reset_channel() log.warning("gRPC [%s]: %s", e.code(), e.details()) backoff(failures=attempt)
def extract_text(self, data, languages=None): key = make_key('ocr', sha1(data).hexdigest()) text = kv.get(key) if text is not None: log.info('Vision API: %s chars cached', len(text)) return text data = self.ensure_size(data) if data is not None: image = types.Image(content=data) res = self.client.document_text_detection(image) ann = res.full_text_annotation log.info('Vision API: %s chars recognized', len(ann.text)) kv.set(key, ann.text) return ann.text
def extract_text(self, data, languages=None): if not MIN_SIZE < len(data) < MAX_SIZE: log.info('OCR: file size out of range (%d)', len(data)) return None key = make_key('ocr', sha1(data).hexdigest()) if kv.exists(key): text = kv.get(key) if text is not None: text = text.decode('utf-8') log.info('Vision API: %s chars cached', len(text)) return text # data = self.ensure_size(data) # if data is None: # return image = types.Image(content=data) res = self.client.document_text_detection(image) ann = res.full_text_annotation log.info('Vision API: %s chars recognized', len(ann.text)) kv.set(key, ann.text) return ann.text
def key(self, *parts): return make_key(self.prefix, *parts)
def place_key(name): return make_key(PLACE_KEY, name)