Beispiel #1
0
    def _parse_pack(self, nli_instance) -> Iterator[PackType]:
        pair_id, source, target, label = nli_instance

        pack = DataPack(pair_id)
        text = source + "\n" + target + "\n"
        pack.set_text(text)

        premise = Premise(pack, 0, len(source))
        hypo = Hypothesis(pack, len(source) + 1, len(text) - 1)

        pair = NLIPair(pack)
        pair.set_parent(premise)
        pair.set_child(hypo)

        pair.entailment = {
            "entailment": 0,
            "neutral": 0,
            "contradiction": 0,
        }

        if label == 2:
            pair.entailment["contradiction"] = 1
        elif label == 0:
            pair.entailment["entailment"] = 1
        elif label == 1:
            pair.entailment["neutral"] = 1
        else:
            raise ValueError("Unknown label value.")

        yield pack
    def _parse_pack(
        self, doc_data: Tuple[Dict[str, str], Dict[str, List[state_type]]]
    ) -> Iterator[DataPack]:
        str_data, node_data = doc_data

        pack = DataPack()
        doc_name: str = str_data['doc_name']
        if doc_name in self.redirects:
            doc_name = self.redirects[doc_name]

        full_text: str = str_data['text']

        pack.set_text(full_text)
        page = WikiPage(pack, 0, len(full_text))
        pack.add_entry(page)
        page.set_page_id(str_data['oldid'])
        page.set_page_name(doc_name)

        if len(node_data['struct']) > 0:
            add_struct(pack, node_data['struct'])
        else:
            logging.warning('Structure info for %s not found.', doc_name)

        if len(node_data['links']) > 0:
            add_anchor_links(pack, node_data['links'], self.redirects)
        else:
            logging.warning('Links for [%s] not found.', doc_name)

        pack.meta.doc_id = doc_name

        yield pack
Beispiel #3
0
    def _process(self, input_pack: MultiPack):
        r"""Searches ElasticSearch indexer to fetch documents for a query. This
        query should be contained in the input multipack with name
        `self.config.query_pack_name`.

        This method adds new packs to `input_pack` containing the retrieved
        results. Each result is added as a `ft.onto.base_ontology.Document`.

        Args:
             input_pack: A multipack containing query as a pack.
        """
        query_pack = input_pack.get_pack(self.config.query_pack_name)

        # ElasticSearchQueryCreator adds a Query entry to query pack. We now
        # fetch it as the first element.
        first_query = list(query_pack.get_entries(Query))[0]
        results = self.index.search(first_query.value)
        hits = results["hits"]["hits"]
        packs = {}
        for idx, hit in enumerate(hits):
            document = hit["_source"]
            first_query.update_results({document["doc_id"]: hit["_score"]})
            pack = DataPack(doc_id=document["doc_id"])
            content = document[self.config.field]
            document = Document(pack=pack, begin=0, end=len(content))
            pack.add_entry(document)
            pack.set_text(content)
            packs[f"{self.config.response_pack_name_prefix}_{idx}"] = pack

        input_pack.update_pack(packs)
    def _process(self, input_pack: MultiPack):
        query = input_pack.get_pack(self.in_pack_name).text
        params = '?' + urlencode(
            {
                'api-version': '3.0',
                'from': self.src_language,
                'to': [self.target_language]
            },
            doseq=True)
        microsoft_constructed_url = self.microsoft_translate_url + params

        response = requests.post(microsoft_constructed_url,
                                 headers=self.microsoft_headers,
                                 json=[{
                                     "text": query
                                 }])

        if response.status_code != 200:
            raise RuntimeError(response.json()['error']['message'])

        text = response.json()[0]["translations"][0]["text"]
        pack = DataPack()

        document = Document(pack, 0, len(text))
        utterance = Utterance(pack, 0, len(text))
        pack.add_entry(document)
        pack.add_entry(utterance)

        pack.set_text(text=text)
        input_pack.update_pack({self.out_pack_name: pack})
Beispiel #5
0
 def _process(self, input_pack: DataPack):
     title_text = input_pack.get_single(WikiPage).page_name
     title_text = title_text.replace("_", " ")
     new_text = input_pack.text + "\n" + title_text
     title_begin = len(input_pack.text) + 1
     title_end = title_begin + len(title_text)
     input_pack.set_text(new_text)
     WikiArticleTitle(input_pack, title_begin, title_end)
Beispiel #6
0
def create_nli(pack: DataPack, premise_text, hypothesis_text):
    text = premise_text + "\n" + hypothesis_text + "\n"
    pack.set_text(text)

    premise = Premise(pack, 0, len(premise_text))
    hypo = Hypothesis(pack, len(premise_text) + 1, len(text) - 1)

    pair = NLIPair(pack)
    pair.set_parent(premise)
    pair.set_child(hypo)
Beispiel #7
0
    def _process(self, input_pack: MultiPack):
        query_pack = input_pack.get_pack(self.config.query_pack_name)
        first_query = list(query_pack.get_entries(Query))[0]
        results = self.index.search(first_query.value, self.k)
        documents = [r[1] for result in results for r in result]

        packs = {}
        for i, doc in enumerate(documents):
            pack = DataPack()
            document = Document(pack=pack, begin=0, end=len(doc))
            pack.add_entry(document)
            pack.set_text(doc)
            packs[self.config.response_pack_name[i]] = pack

        input_pack.update_pack(packs)
Beispiel #8
0
    def _process(self, input_pack: MultiPack):
        query_pack = input_pack.get_pack("pack")
        first_query = list(query_pack.get_entries(Query))[0]
        results = self.index.search(first_query.value, self.k)
        documents = [r[1] for result in results for r in result]

        packs = {}
        counter = 0
        for doc in documents:
            pack = DataPack()
            document = Document(pack=pack, begin=0, end=len(doc))
            pack.add_entry(document)
            pack.set_text(doc)
            packs[f"doc_{counter}"] = pack
            counter += 1

        input_pack.update_pack(packs)
 def _process(self, input_pack: DataPack):
     input_pack.set_text(input_pack.text.lower())