def _create_dependencies(input_pack, tokens, result): deps = result['predicted_dependencies'] heads = result['predicted_heads'] for i, token in enumerate(tokens): relation = Dependency(input_pack, parent=tokens[heads[i] - 1], child=token) relation.set_fields(rel_type=deps[i]) input_pack.add_or_get_entry(relation)
def _process(self, input_pack: DataPack): doc = input_pack.text end_pos = 0 # sentence parsing sentences = self.nlp(doc).sentences # type: ignore # Iterating through stanfordnlp sentence objects for sentence in sentences: begin_pos = doc.find(sentence.words[0].text, end_pos) end_pos = doc.find(sentence.words[-1].text, begin_pos) + len( sentence.words[-1].text) sentence_entry = Sentence(input_pack, begin_pos, end_pos) input_pack.add_or_get_entry(sentence_entry) tokens: List[Token] = [] if "tokenize" in self.processors: offset = sentence_entry.span.begin end_pos_word = 0 # Iterating through stanfordnlp word objects for word in sentence.words: begin_pos_word = sentence_entry.text. \ find(word.text, end_pos_word) end_pos_word = begin_pos_word + len(word.text) token = Token(input_pack, begin_pos_word + offset, end_pos_word + offset ) if "pos" in self.processors: token.set_fields(pos=word.pos) token.set_fields(upos=word.upos) token.set_fields(xpos=word.xpos) if "lemma" in self.processors: token.set_fields(lemma=word.lemma) tokens.append(token) input_pack.add_or_get_entry(token) # For each sentence, get the dependency relations among tokens if "depparse" in self.processors: # Iterating through token entries in current sentence for token, word in zip(tokens, sentence.words): child = token # current token parent = tokens[word.governor - 1] # Root token relation_entry = Dependency(input_pack, parent, child) relation_entry.set_fields( rel_type=word.dependency_relation) input_pack.add_or_get_entry(relation_entry)