예제 #1
0
 def extract(self, collector, document):
     DocumentTagCollector(document, 'polyglot').save()
     DocumentTagCollector(document, 'spacy').save()
     try:
         service = EntityExtractStub(self.channel)
         texts = self.text_iterator(document)
         entities = service.Extract(texts)
         for entity in entities.entities:
             type_ = self.TYPES.get(entity.type)
             if type_ is None:
                 continue
             collector.emit(entity.label, type_, weight=entity.weight)
         log.info('Extracted %s entities.', len(collector))
     except self.Error as e:
         log.warning("gRPC [%s]: %s", e.code(), e.details())
예제 #2
0
 def extract_text(self, text, languages):
     for attempt in service_retries():
         try:
             service = EntityExtractStub(self.channel)
             req = Text(text=text, languages=languages)
             for res in service.Extract(req):
                 clazz = self.TYPES.get(res.type)
                 yield (res.text, clazz, res.start, res.end)
             break
         except self.Error as e:
             if e.code() not in self.TEMPORARY_ERRORS:
                 return
             self.reset_channel()
             log.warning("gRPC [%s]: %s", e.code(), e.details())
             backoff(failures=attempt)
예제 #3
0
 def extract(self, collector, document):
     DocumentTagCollector(document, 'polyglot').save()
     DocumentTagCollector(document, 'spacy').save()
     try:
         service = EntityExtractStub(self.channel)
         texts = self.text_iterator(document)
         entities = service.Extract(texts)
         for entity in entities.entities:
             type_ = self.TYPES.get(entity.type)
             if type_ is None:
                 continue
             collector.emit(entity.label, type_, weight=entity.weight)
         log.info('Extracted %s entities.', len(collector))
     except self.Error as exc:
         log.exception("gRPC Error: %s", self.SERVICE)
         self.reset_channel()
예제 #4
0
파일: extract.py 프로젝트: jbaehne/aleph
 def extract(self, text, languages):
     if text is None or len(text) < self.MIN_LENGTH:
         return
     texts = textwrap.wrap(text, self.MAX_LENGTH)
     for text in texts:
         for attempt in range(10):
             try:
                 service = EntityExtractStub(self.channel)
                 req = Text(text=text, languages=languages)
                 for res in service.Extract(req):
                     clazz = self.TYPES.get(res.type)
                     yield (res.text, clazz, res.start, res.end)
                 break
             except self.Error as e:
                 if e.code() == self.Status.RESOURCE_EXHAUSTED:
                     continue
                 log.warning("gRPC [%s]: %s", e.code(), e.details())
                 backoff(failures=attempt)
                 self.reset_channel()
예제 #5
0
 def extract(self, collector, document):
     DocumentTagCollector(document, 'polyglot').save()
     DocumentTagCollector(document, 'spacy').save()
     try:
         service = EntityExtractStub(self.channel)
         texts = self.text_iterator(document)
         entities = service.Extract(texts)
         for entity in entities.entities:
             if entity.type == ExtractedEntity.COUNTRY:
                 document.add_country(entity.label)
             if entity.type == ExtractedEntity.LANGUAGE:
                 document.add_language(entity.label)
             type_ = self.TYPES.get(entity.type)
             # log.info('%s: %s', entity.label, type_)
             if type_ is not None:
                 collector.emit(entity.label, type_, weight=entity.weight)
         log.info('Extracted %s entities.', len(collector))
     except self.Error as e:
         log.warning("gRPC [%s]: %s", e.code(), e.details())
예제 #6
0
    def extract(self, collector, document):
        languages = list(document.languages)
        if not len(languages):
            languages = [settings.DEFAULT_LANGUAGE]

        try:
            channel = grpc.insecure_channel(self.SERVICE)
            service = EntityExtractStub(channel)
            for text in document.texts:
                if len(text) <= self.MIN_LENGTH:
                    continue

                text = Text(text=text, languages=languages)
                for entity in service.Extract(text):
                    type_ = self.TYPES.get(entity.type)
                    if type_ is None:
                        continue
                    collector.emit(entity.label, type_)

            log.info('%s Extracted %s entities.', self.SERVICE, len(collector))
        except grpc.RpcError as exc:
            log.warning("gRPC Error: %s", exc)
예제 #7
0
import statistics
import grpc
import time
from alephclient.services.entityextract_pb2_grpc import EntityExtractStub
from alephclient.services.common_pb2 import Text

URL = 'localhost:50000'

channel = grpc.insecure_channel(URL)
service = EntityExtractStub(channel)


def generate():
    with open('tests/fixtures/pace.txt', 'r', encoding='utf-8') as fh:
        for line in fh:
            yield Text(text=line, languages=['en'])


times = []
for i in range(1):
    start = time.time()
    entities = service.Extract(generate())
    for entity in entities.entities:
        print((entity.label, entity.weight, entity.type))
        pass
    end = time.time()
    times.append(end - start)

print(statistics.mean(times))
예제 #8
0
import grpc
import time
import statistics
# from threading import Thread
from alephclient.services.common_pb2 import Text
from alephclient.services.entityextract_pb2_grpc import EntityExtractStub

URL = 'localhost:50000'
TEXT = 'There was Joseph Stalin working at the Kremlin in Moscow'
channel = grpc.insecure_channel(URL)
service = EntityExtractStub(channel)
times = []
for i in range(100):
    start = time.time()
    image = Text(text=TEXT, languages=['en'])
    for ent in service.Extract(image):
        print(ent.text)
    end = time.time()
    times.append(end - start)

print(statistics.mean(times))


# def target():
#     channel = grpc.insecure_channel(URL)
#     service = EntityExtractStub(channel)
#     for i in range(300):
#         image = Text(text=TEXT, languages=['en'])
#         for ent in service.Extract(image):
#             # print(ent.text)
#             pass
예제 #9
0
 def get_service(self):
     cls = type(self)
     if not hasattr(cls, '_channel') or cls._channel is None:
         channel = grpc.insecure_channel(self.SERVICE)
     return EntityExtractStub(channel)