def _process(self, input_pack: MultiPack): context_list = list() doc_id_list = list() for doc_id in input_pack.pack_names: if doc_id == self.configs.question_pack_name: continue pack = input_pack.get_pack(doc_id) context_list.append(pack.get_single(self.configs.entry_type).text) doc_id_list.append(doc_id) question_pack = input_pack.get_pack(self.configs.question_pack_name) first_question = question_pack.get_single(Sentence) question_list = [question_pack.text for i in range(len(context_list))] result_collection = self.extractor( context=context_list, question=question_list, max_answer_len=self.configs.max_answer_len, handle_impossible_answer=self.configs.handle_impossible_answer, ) for i, result in enumerate(result_collection): start = result["start"] end = result["end"] doc_pack = input_pack.get_pack(doc_id_list[i]) ans_phrase = Phrase(pack=doc_pack, begin=start, end=end) input_pack.add_entry( MultiPackLink(input_pack, first_question, ans_phrase))
def pack(self, data_pack: MultiPack, output_dict): """ Write the prediction results back to datapack. If :attr:`_overwrite` is `True`, write the predicted ner to the original tokens. Otherwise, create a new set of tokens and write the predicted ner to the new tokens (usually use this configuration for evaluation.) """ assert output_dict is not None output_pack = data_pack.get_pack(self.output_pack_name) input_sent_tids = output_dict["input_sents_tids"] output_sentences = output_dict["output_sents"] text = output_pack.text input_pack = data_pack.get_pack(self.input_pack_name) for input_id, output_sentence in zip(input_sent_tids, output_sentences): offset = len(output_pack.text) sent = Sentence(output_pack, offset, offset + len(output_sentence)) text += output_sentence + "\n" input_sent = input_pack.get_entry(input_id) cross_link = MultiPackLink(data_pack, input_sent, sent) data_pack.add_entry(cross_link) # We may also consider adding two link with opposite directions # Here the unidirectional link indicates the generation dependency output_pack.set_text(text)
def test_multipack_entries(self): """ Test some multi pack entry. Returns: """ # 1. Add tokens to each pack. for pack in self.multi_pack.packs: _space_token(pack) left_tokens = [t.text for t in self.multi_pack.packs[0].get(Token)] right_tokens = [t.text for t in self.multi_pack.packs[1].get(Token)] self.assertListEqual(left_tokens, ["This", "pack", "contains", "some", "sample", "data."]) self.assertListEqual(right_tokens, ["This", "pack", "contains", "some", "other", "sample", "data."]) # 2. Link the same words from two packs. token: Annotation left_tokens = {} for token in self.multi_pack.packs[0].get(Token): left_tokens[token.text] = token right_tokens = {} for token in self.multi_pack.packs[1].get(Token): right_tokens[token.text] = token for key, lt in left_tokens.items(): if key in right_tokens: rt = right_tokens[key] self.multi_pack.add_entry(MultiPackLink( self.multi_pack, lt, rt)) # One way to link tokens. linked_tokens = [] for link in self.multi_pack.links: parent_text = link.get_parent().text child_text = link.get_child().text linked_tokens.append((parent_text, child_text)) self.assertListEqual( linked_tokens, [("This", "This"), ("pack", "pack"), ("contains", "contains"), ("some", "some"), ("sample", "sample"), ("data.", "data.")]) # Another way to get the links linked_tokens = [] for link in self.multi_pack.get(MultiPackLink): parent_text = link.get_parent().text child_text = link.get_child().text linked_tokens.append((parent_text, child_text)) self.assertListEqual( linked_tokens, [("This", "This"), ("pack", "pack"), ("contains", "contains"), ("some", "some"), ("sample", "sample"), ("data.", "data.")]) # 3. Test deletion # Delete the second link. self.multi_pack.delete_entry(self.multi_pack.links[1]) linked_tokens = [] for link in self.multi_pack.links: parent_text = link.get_parent().text child_text = link.get_child().text linked_tokens.append((parent_text, child_text)) self.assertListEqual( linked_tokens, [("This", "This"), ("contains", "contains"), ("some", "some"), ("sample", "sample"), ("data.", "data.")])