def doc_for_text(self, text): if self.component_config.get("crf_lexical"): request = nlp_messages.NlTokenizerRequest(text=nlp_messages.NlText( text=text)) response = self.nlp.Tokenizer(request) return response else: request = nlp_messages.NlTokenizerRequest(text=nlp_messages.NlText( text=text)) response = self.nlp.EntityExtractor(request) return response
def process(self, message, **kwargs): # type: (Message, **Any) -> None # can't use the existing doc here (spacy_doc on the message) # because tokens are lower cased which is bad for NER hanlp = kwargs.get("hanlp", None) request = nlp_messages.NlText(text=message.text) doc = hanlp.ParseAmountTerms(request) extracted = self.add_extractor_name(self.extract_entities(doc)) message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
def run(): serv = ServiceClient(nlp_service, 'CabochaNlpProcsStub', 'localhost', 50051) # Insert example metadata metadata = [('ip', '127.0.0.1')] response = serv.Tokenizer( nlp_messages.NlText(text="お皿を二枚ください。"), metadata=metadata ) if response: print("response:") tokens=tokenize_msg("お皿を二枚ください。", response) for t in tokens: print(t.text, t.offset)
def doc_for_text(self, text): request = nlp_messages.NlText(text=text) response = self.nlp.Tokenizer(request) return response
def extract(self, text): request = nlp_messages.NlTokenizerRequest(text=nlp_messages.NlText(text=text)) response = self.client.EntityExtractor(request) return response
def get_deps(self, text): request = nlp_messages.NlTexts(texts=[nlp_messages.NlText(text=text)]) response = self.client.GetDependencyGraph(request) return response
def tokenize(self, text): request = nlp_messages.NlTokenizerRequest(text=nlp_messages.NlText( text=text)) response = self.client.Tokenizer(request) return response