def _parse_pack(self, nli_instance) -> Iterator[PackType]: pair_id, source, target, label = nli_instance pack = DataPack(pair_id) text = source + "\n" + target + "\n" pack.set_text(text) premise = Premise(pack, 0, len(source)) hypo = Hypothesis(pack, len(source) + 1, len(text) - 1) pair = NLIPair(pack) pair.set_parent(premise) pair.set_child(hypo) pair.entailment = { "entailment": 0, "neutral": 0, "contradiction": 0, } if label == 2: pair.entailment["contradiction"] = 1 elif label == 0: pair.entailment["entailment"] = 1 elif label == 1: pair.entailment["neutral"] = 1 else: raise ValueError("Unknown label value.") yield pack
def _parse_pack( self, doc_data: Tuple[Dict[str, str], Dict[str, List[state_type]]] ) -> Iterator[DataPack]: str_data, node_data = doc_data pack = DataPack() doc_name: str = str_data['doc_name'] if doc_name in self.redirects: doc_name = self.redirects[doc_name] full_text: str = str_data['text'] pack.set_text(full_text) page = WikiPage(pack, 0, len(full_text)) pack.add_entry(page) page.set_page_id(str_data['oldid']) page.set_page_name(doc_name) if len(node_data['struct']) > 0: add_struct(pack, node_data['struct']) else: logging.warning('Structure info for %s not found.', doc_name) if len(node_data['links']) > 0: add_anchor_links(pack, node_data['links'], self.redirects) else: logging.warning('Links for [%s] not found.', doc_name) pack.meta.doc_id = doc_name yield pack
def _process(self, input_pack: MultiPack): r"""Searches ElasticSearch indexer to fetch documents for a query. This query should be contained in the input multipack with name `self.config.query_pack_name`. This method adds new packs to `input_pack` containing the retrieved results. Each result is added as a `ft.onto.base_ontology.Document`. Args: input_pack: A multipack containing query as a pack. """ query_pack = input_pack.get_pack(self.config.query_pack_name) # ElasticSearchQueryCreator adds a Query entry to query pack. We now # fetch it as the first element. first_query = list(query_pack.get_entries(Query))[0] results = self.index.search(first_query.value) hits = results["hits"]["hits"] packs = {} for idx, hit in enumerate(hits): document = hit["_source"] first_query.update_results({document["doc_id"]: hit["_score"]}) pack = DataPack(doc_id=document["doc_id"]) content = document[self.config.field] document = Document(pack=pack, begin=0, end=len(content)) pack.add_entry(document) pack.set_text(content) packs[f"{self.config.response_pack_name_prefix}_{idx}"] = pack input_pack.update_pack(packs)
def _process(self, input_pack: MultiPack): query = input_pack.get_pack(self.in_pack_name).text params = '?' + urlencode( { 'api-version': '3.0', 'from': self.src_language, 'to': [self.target_language] }, doseq=True) microsoft_constructed_url = self.microsoft_translate_url + params response = requests.post(microsoft_constructed_url, headers=self.microsoft_headers, json=[{ "text": query }]) if response.status_code != 200: raise RuntimeError(response.json()['error']['message']) text = response.json()[0]["translations"][0]["text"] pack = DataPack() document = Document(pack, 0, len(text)) utterance = Utterance(pack, 0, len(text)) pack.add_entry(document) pack.add_entry(utterance) pack.set_text(text=text) input_pack.update_pack({self.out_pack_name: pack})
def _process(self, input_pack: DataPack): title_text = input_pack.get_single(WikiPage).page_name title_text = title_text.replace("_", " ") new_text = input_pack.text + "\n" + title_text title_begin = len(input_pack.text) + 1 title_end = title_begin + len(title_text) input_pack.set_text(new_text) WikiArticleTitle(input_pack, title_begin, title_end)
def create_nli(pack: DataPack, premise_text, hypothesis_text): text = premise_text + "\n" + hypothesis_text + "\n" pack.set_text(text) premise = Premise(pack, 0, len(premise_text)) hypo = Hypothesis(pack, len(premise_text) + 1, len(text) - 1) pair = NLIPair(pack) pair.set_parent(premise) pair.set_child(hypo)
def _process(self, input_pack: MultiPack): query_pack = input_pack.get_pack(self.config.query_pack_name) first_query = list(query_pack.get_entries(Query))[0] results = self.index.search(first_query.value, self.k) documents = [r[1] for result in results for r in result] packs = {} for i, doc in enumerate(documents): pack = DataPack() document = Document(pack=pack, begin=0, end=len(doc)) pack.add_entry(document) pack.set_text(doc) packs[self.config.response_pack_name[i]] = pack input_pack.update_pack(packs)
def _process(self, input_pack: MultiPack): query_pack = input_pack.get_pack("pack") first_query = list(query_pack.get_entries(Query))[0] results = self.index.search(first_query.value, self.k) documents = [r[1] for result in results for r in result] packs = {} counter = 0 for doc in documents: pack = DataPack() document = Document(pack=pack, begin=0, end=len(doc)) pack.add_entry(document) pack.set_text(doc) packs[f"doc_{counter}"] = pack counter += 1 input_pack.update_pack(packs)
def _process(self, input_pack: DataPack): input_pack.set_text(input_pack.text.lower())