コード例 #1
0
ファイル: reader.py プロジェクト: williamwhe/forte
 def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
     fields = data_source.split("\t")
     data_pack = DataPack(doc_id=fields[0])
     multi_pack = MultiPack()
     document = Document(pack=data_pack, begin=0, end=len(fields[1]))
     data_pack.add_entry(document)
     data_pack.set_text(fields[1])
     multi_pack.update_pack({self.config.pack_name: data_pack})
     yield multi_pack
コード例 #2
0
    def _process(self, input_pack: MultiPack):
        query_pack = input_pack.get_pack(self.configs.query_pack_name)
        first_query = list(query_pack.get(Query))[0]
        results = self.index.search(first_query.value, self.k)
        documents = [r[1] for result in results for r in result]

        packs = {}
        for i, doc in enumerate(documents):
            pack = input_pack.add_pack()
            pack.set_text(doc)

            Document(pack, 0, len(doc))
            packs[self.configs.response_pack_name_prefix + f"_{i}"] = pack

        input_pack.update_pack(packs)
コード例 #3
0
 def _parse_pack(self,
                 file_path: str) -> Iterator[DataPack]:  # type: ignore
     with open(file_path, "r", encoding="utf8") as doc:
         for line in doc:
             m_pack = MultiPack()
             pack = DataPack(doc_id=file_path)
             line = line.strip()
             if len(line) == 0:
                 continue
             sent = Sentence(pack, 0, len(line))
             pack.add_entry(sent)
             pack.set_text(line)
             self.count += 1
             m_pack.update_pack({"pack": pack})
             yield m_pack  # type: ignore
コード例 #4
0
    def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
        """
        Takes a raw string and converts into a MultiPack

        Args:
            data_source: str that contains text of a document

        Returns: MultiPack containing a datapack for the current query

        """

        multi_pack = MultiPack()

        # use context to build the query
        if self.resource.get("user_utterance"):
            user_pack = self.resource.get("user_utterance")[-1]
            multi_pack.update_pack({"user_utterance": user_pack})

        if self.resource.get("bot_utterance"):
            bot_pack = self.resource.get("bot_utterance")[-1]
            multi_pack.update_pack({"bot_utterance": bot_pack})

        pack = DataPack()
        utterance = Utterance(pack, 0, len(data_source))
        pack.add_entry(utterance)

        pack.set_text(data_source, replace_func=self.text_replace_operation)
        multi_pack.update_pack({self.config.pack_name: pack})

        yield multi_pack