Esempio n. 1
0
    def _parse_pack(self, data_source: str) -> Iterator[MultiPack]:
        r"""Takes a raw string and converts into a MultiPack.

        Args:
            data_source: str that contains text of a document.

        Returns: MultiPack containing a datapack for the current query.
        """
        multi_pack = MultiPack()

        # use context to build the query
        if self.resources is not None and self.resources.get("user_utterance"):
            multi_pack.add_pack_(
                self.resources.get("user_utterance")[-1], "user_utterance")

        if self.resources is not None and self.resources.get("bot_utterance"):
            multi_pack.add_pack_(
                self.resources.get("bot_utterance")[-1], "bot_utterance")

        pack = multi_pack.add_pack(self.configs.pack_name)
        pack.set_text(data_source, replace_func=self.text_replace_operation)

        Utterance(pack, 0, len(data_source))

        yield multi_pack
Esempio n. 2
0
    def cast(self, pack: DataPack) -> MultiPack:
        """
        Args:
            pack: The data pack to be boxed

        Returns: An iterator that produces the boxed multi pack.

        """
        p = MultiPack()
        p.add_pack_(pack, self.configs.pack_name)
        return p
Esempio n. 3
0
    def cast(self, pack: DataPack) -> MultiPack:
        """
        Auto-box the data-pack into a multi-pack by simple wrapping.

        Args:
            pack: The data pack to be boxed

        Returns: An iterator that produces the boxed multi pack.

        """
        p = MultiPack()
        p.add_pack_(pack, self.configs.pack_name)
        return p
Esempio n. 4
0
    def _process(self, input_pack: MultiPack):
        r"""Searches `Elasticsearch` indexer to fetch documents for a query.
        This query should be contained in the input multipack with name
        `self.config.query_pack_name`.

        This method adds new packs to `input_pack` containing the retrieved
        results. Each result is added as a `ft.onto.base_ontology.Document`.

        Args:
             input_pack: A multipack containing query as a pack.
        """
        query_pack = input_pack.get_pack(self.configs.query_pack_name)

        # ElasticSearchQueryCreator adds a Query entry to query pack. We now
        # fetch it as the first element.
        first_query: Query = query_pack.get_single(Query)
        # pylint: disable=isinstance-second-argument-not-valid-type
        # TODO: until fix: https://github.com/PyCQA/pylint/issues/3507
        if not isinstance(first_query.value, Dict):
            raise ValueError(
                "The query to the elastic indexer need to be a dictionary.")
        results = self.index.search(first_query.value)
        hits = results["hits"]["hits"]

        for idx, hit in enumerate(hits):
            document = hit["_source"]
            first_query.add_result(document["doc_id"], hit["_score"])

            if self.configs.indexed_text_only:
                pack: DataPack = input_pack.add_pack(
                    f"{self.configs.response_pack_name_prefix}_{idx}")
                pack.pack_name = document["doc_id"]

                content = document[self.configs.field]
                pack.set_text(content)

                Document(pack=pack, begin=0, end=len(content))

            else:
                pack = DataPack.deserialize(document["pack_info"])
                input_pack.add_pack_(
                    pack, f"{self.configs.response_pack_name_prefix}_{idx}")
                pack.pack_name = document["doc_id"]
Esempio n. 5
0
    def _process(self, input_pack: MultiPack):
        # Get the pack names for augmentation.
        aug_pack_names: List[str] = []

        # Check if the DataPack exists.
        for pack_name in self.configs["augment_pack_names"]["kwargs"].keys():
            if pack_name in input_pack.pack_names:
                aug_pack_names.append(pack_name)

        if len(self.configs["augment_pack_names"]["kwargs"].keys()) == 0:
            # Augment all the DataPacks if not specified.
            aug_pack_names = list(input_pack.pack_names)

        self._augment(input_pack, aug_pack_names)
        new_packs: List[Tuple[str, DataPack]] = []
        for aug_pack_name in aug_pack_names:
            new_pack_name: str = \
                self.configs["augment_pack_names"]["kwargs"].get(
                    aug_pack_name, "augmented_" + aug_pack_name
                )
            data_pack = input_pack.get_pack(aug_pack_name)
            new_pack = self._auto_align_annotations(
                data_pack=data_pack,
                replaced_annotations=self._replaced_annos[
                    data_pack.meta.pack_id])
            new_packs.append((new_pack_name, new_pack))

        for new_pack_name, new_pack in new_packs:
            input_pack.add_pack_(new_pack, new_pack_name)

        # Copy the MultiPackLinks/MultiPackGroups
        for mpl in input_pack.get(MultiPackLink):
            self._copy_multi_pack_link_or_group(mpl, input_pack)
        for mpg in input_pack.get(MultiPackGroup):
            self._copy_multi_pack_link_or_group(mpg, input_pack)

        # Must be called after processing each multipack
        # to reset internal states.
        self._clear_states()