def extract_text(self, text, languages): for attempt in service_retries(): try: service = EntityExtractStub(self.channel) req = Text(text=text, languages=languages) for res in service.Extract(req): clazz = self.TYPES.get(res.type) yield (res.text, clazz, res.start, res.end) break except self.Error as e: if e.code() not in self.TEMPORARY_ERRORS: return self.reset_channel() log.warning("gRPC [%s]: %s", e.code(), e.details()) backoff(failures=attempt)
def extract(self, collector, document): DocumentTagCollector(document, 'polyglot').save() DocumentTagCollector(document, 'spacy').save() try: service = EntityExtractStub(self.channel) texts = self.text_iterator(document) entities = service.Extract(texts) for entity in entities.entities: type_ = self.TYPES.get(entity.type) if type_ is None: continue collector.emit(entity.label, type_, weight=entity.weight) log.info('Extracted %s entities.', len(collector)) except self.Error as e: log.warning("gRPC [%s]: %s", e.code(), e.details())
def extract(self, collector, document): DocumentTagCollector(document, 'polyglot').save() DocumentTagCollector(document, 'spacy').save() try: service = EntityExtractStub(self.channel) texts = self.text_iterator(document) entities = service.Extract(texts) for entity in entities.entities: type_ = self.TYPES.get(entity.type) if type_ is None: continue collector.emit(entity.label, type_, weight=entity.weight) log.info('Extracted %s entities.', len(collector)) except self.Error as exc: log.exception("gRPC Error: %s", self.SERVICE) self.reset_channel()
def extract(self, text, languages): if text is None or len(text) < self.MIN_LENGTH: return texts = textwrap.wrap(text, self.MAX_LENGTH) for text in texts: for attempt in range(10): try: service = EntityExtractStub(self.channel) req = Text(text=text, languages=languages) for res in service.Extract(req): clazz = self.TYPES.get(res.type) yield (res.text, clazz, res.start, res.end) break except self.Error as e: if e.code() == self.Status.RESOURCE_EXHAUSTED: continue log.warning("gRPC [%s]: %s", e.code(), e.details()) backoff(failures=attempt) self.reset_channel()
def extract(self, collector, document): DocumentTagCollector(document, 'polyglot').save() DocumentTagCollector(document, 'spacy').save() try: service = EntityExtractStub(self.channel) texts = self.text_iterator(document) entities = service.Extract(texts) for entity in entities.entities: if entity.type == ExtractedEntity.COUNTRY: document.add_country(entity.label) if entity.type == ExtractedEntity.LANGUAGE: document.add_language(entity.label) type_ = self.TYPES.get(entity.type) # log.info('%s: %s', entity.label, type_) if type_ is not None: collector.emit(entity.label, type_, weight=entity.weight) log.info('Extracted %s entities.', len(collector)) except self.Error as e: log.warning("gRPC [%s]: %s", e.code(), e.details())
def extract(self, collector, document): languages = list(document.languages) if not len(languages): languages = [settings.DEFAULT_LANGUAGE] try: channel = grpc.insecure_channel(self.SERVICE) service = EntityExtractStub(channel) for text in document.texts: if len(text) <= self.MIN_LENGTH: continue text = Text(text=text, languages=languages) for entity in service.Extract(text): type_ = self.TYPES.get(entity.type) if type_ is None: continue collector.emit(entity.label, type_) log.info('%s Extracted %s entities.', self.SERVICE, len(collector)) except grpc.RpcError as exc: log.warning("gRPC Error: %s", exc)
import statistics import grpc import time from alephclient.services.entityextract_pb2_grpc import EntityExtractStub from alephclient.services.common_pb2 import Text URL = 'localhost:50000' channel = grpc.insecure_channel(URL) service = EntityExtractStub(channel) def generate(): with open('tests/fixtures/pace.txt', 'r', encoding='utf-8') as fh: for line in fh: yield Text(text=line, languages=['en']) times = [] for i in range(1): start = time.time() entities = service.Extract(generate()) for entity in entities.entities: print((entity.label, entity.weight, entity.type)) pass end = time.time() times.append(end - start) print(statistics.mean(times))
import grpc import time import statistics # from threading import Thread from alephclient.services.common_pb2 import Text from alephclient.services.entityextract_pb2_grpc import EntityExtractStub URL = 'localhost:50000' TEXT = 'There was Joseph Stalin working at the Kremlin in Moscow' channel = grpc.insecure_channel(URL) service = EntityExtractStub(channel) times = [] for i in range(100): start = time.time() image = Text(text=TEXT, languages=['en']) for ent in service.Extract(image): print(ent.text) end = time.time() times.append(end - start) print(statistics.mean(times)) # def target(): # channel = grpc.insecure_channel(URL) # service = EntityExtractStub(channel) # for i in range(300): # image = Text(text=TEXT, languages=['en']) # for ent in service.Extract(image): # # print(ent.text) # pass